예제 #1
0
파일: run.py 프로젝트: barichd/MetaStorm
def get_tree():
    try:
        data = request.get_json()
        pid = data["pid"]
        uid = data["uid"]
        sid = data["sid"]
        selval = data["value"]
        pipeline = data["pip"]
        dbname = data["rid"]
        # dbname="isuezrouja"
        analysis = "taxonomy"
        rf = rootvar.result_files(pid, analysis, pipeline, sid, dbname)
        x = sql.SQL(main_db)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        with open(rf.json) as data_file:
            data = json.load(data_file)
        x.close()
        ###print data
        view = rootvar.ViewSampleResults(
            sample, pipeline, dbname, analysis, rf)
        return jsonify(tree=[data], range=view.range(selval)[0], pip=pipeline)
    except Exception as inst:
        return "ERROR: "+str(inst)
예제 #2
0
파일: run.py 프로젝트: barichd/MetaStorm
def get_taxo_by_name():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    tid = data["tid"]
    lid = data["lid"]
    pip = data["pip"]
    dbname = data["rid"]
    pipeline = pip
    # dbname="isuezrouja"
    analysis = "taxonomy"
    x = sql.SQL(main_db)
    rf = rootvar.result_files(pid, analysis, pip, sid, dbname)
    samples = x.exe('select * from samples where project_id="' +
                    pid+'" and sample_id="'+sid+'"')
    xpath = x.project(pid)[0][4]
    sample = rootvar.samples(samples[0], xpath)
    x.close()
    view = rootvar.ViewSampleResults(sample, pipeline, dbname, analysis, rf)
    try:
        # return jsonify(x=pip)
        return jsonify(matrix=view.level(lid, type="matches"))
    except Exception as inst:
        return "ERROR: "+str(inst)
예제 #3
0
def functional(data):
    pid = data["pid"]
    uid = data["uid"]
    sids = data["sid"]
    pipeline = data["pip"]
    rid = data["rid"]
    edges = []
    FULL_MATRIX = []
    #first see if the data set contains taxonomy, function or both annotations
    analysis = "function"
    #print rid
    #
    #
    os.system("mkdir " + rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline +
              "/RESULTS/ >> " + rootvar.log + " 2>&1")
    all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk"

    log = open(
        rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid +
        ".log", "w")

    #
    # if os.path.isfile(all_samples_tree_file):
    #     x=sql.SQL(all_samples_tree_file+".db")
    #     val=x.exe("select distinct sample_name from full_matrix")
    #     stored_samples=len(val)
    #
    # x=sql.SQL(main_db)
    # # load all the samples that have been finalized to run
    # sids=x.exe('select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'+str(pipeline)+'") b on a.sample_id==b.sid) c where c.project_id=="'+pid+'" and c.status="Done" and c.rid="'+rid+'"')
    # x.close()
    # if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids):
    #     x=sql.SQL(all_samples_tree_file+".db")
    #     FULL_MATRIX=x.exe("select * from full_matrix")
    #     return ["function",FULL_MATRIX,all_samples_tree_file+".json"]
    # nodes={}

    x = sql.SQL(main_db)
    log.write("echo #LOG for Project ID: " + str(pid) + "\n")
    for sid in sids:
        samples = x.exe('select * from samples where project_id="' + pid +
                        '" and sample_id="' + sid + '"')
        # log.write(str(sid)+"\n")
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        #print sample.name
        # log.write(str(samples[0])+"\n")
        rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid)
        view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf)
        FULL_MATRIX += view.all_func()
        #print sample.name
    x.close()
    # log.write(str(FULL_MATRIX))
    #print FULL_MATRIX[0]
    #rootvar.full_matrix_function(all_samples_tree_file+".db", FULL_MATRIX)
    #print "matrix done"
    # tree=["none"]
    return ["function", FULL_MATRIX, all_samples_tree_file + ".json"]
예제 #4
0
파일: run.py 프로젝트: barichd/MetaStorm
def get_childs_of_taxonomy():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    tid = data["tid"]
    lid = data["lid"]
    cond = data["cond"]
    pip = data["pip"]
    rid = data["rid"]

    dbname = data["rid"]
    analysis = "taxonomy"

    ##print cond, pip, rid, pid, uid, sid, tid, lid

    if cond == "one":  # this is if I am analyzing one or multiple samples
        rf = rootvar.result_files(pid, analysis, pip, sid, dbname)
        x = sql.SQL(main_db)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        x.close()
        view = rootvar.ViewSampleResults(sample, pip, dbname, analysis, rf)
        matrix = view.childs_of(tid, 'rpkm')
        ###print matrix
        return jsonify(matrix=matrix)
    else:
        # load big tree
        all_samples_tree_file = rootvar.__ROOTPRO__+"/" + \
            pid+"/"+pip+"/RESULTS/"+rid+".all_samples_tree.pk"
        G = nx.read_gpickle(all_samples_tree_file)
        tree = json_graph.tree_data(G, root='R')
        data = []
        for i in G.successors(tid):
            data.append(i)
        x = sql.SQL(all_samples_tree_file+".db")

        matrix = rootvar.get_matrix_level_childs(x, data)
        samples_sel = list(set([str(i[0]) for i in matrix]))
        M, N = rootvar.v2m(matrix, samples_sel, 0, 0)
        ##print M
        heatmap = 'none'  # heatmap=iclust.main(M,None)
        return jsonify(data=matrix[0], heatmap=heatmap)
예제 #5
0
파일: run.py 프로젝트: barichd/MetaStorm
def get_statistics():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    programs = data["value"]
    dbname = "isuezrouja"

    x = sql.SQL(main_db)
    samples = x.exe('select * from samples where project_id="' +
                    pid+'" and sample_id="'+sid+'"')
    xpath = x.project(pid)[0][4]
    pname = str(x.project(pid)[0][1])
    ##print pname
    sample = rootvar.samples(samples[0], xpath)
    x.close()
    ###print matrix
    data = []
    pipeline = "matches"
    for program in programs:
        data.append(statsP(sample, program, sid, dbname, pipeline))
    return jsonify(stats=data, sname=sample.name, pname=pname)
예제 #6
0
파일: run.py 프로젝트: barichd/MetaStorm
def get_functional_counts():
    try:
        data = request.get_json()
        pid = data["pid"]
        uid = data["uid"]
        sid = data["sid"]
        pip = data["pip"]
        rid = data["rid"]
        pip = data['pip']
        analysis = "function"
        # lid="assembly"
        # file=rootvar.__ROOTPRO__+"/"+pid+"/assembly/idba_ud/"+sid+"/pred.genes."+rid+".matches.function.abundance.results.sqlite3.db"
        x = sql.SQL(main_db)
        rf = rootvar.result_files(pid, "function", pip, sid, rid)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        x.close()
        view = rootvar.ViewSampleResults(sample, pip, rid, analysis, rf)
        return jsonify(matrix=view.func_one_sample(), m2=view.func_structure(pid, pip, sid))
    except Exception as inst:
        return "ERROR: "+str(inst)
예제 #7
0
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads):
    #db=root.dataset(db)
    #1 get project path
    x=sql.SQL(root.filedb())
    xpath=x.project(projectid)[0][4]
    #print 'here------------'
    ###########################################################################
    #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified.
    ###########################################################################
    val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    ###########################################################################
    #3 get the sample full information - load the class samples
    ###########################################################################
    samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    sample=samples[0]
    sample=root.samples(sample,xpath)
    root.mkdir(sample.assemblyDir)
    ###########################################################################
    # 4.1 Run fq2fa -  this is used by udba_ud program
    ###########################################################################
    idba_ud=root.program('idba_ud',sample,db)
    update_status(x,sampleid,db.id,protocol,"Preprocessing")
    fq2fa=root.program('fq2fa', sample,db)
    if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly

    ###########################################################################
    # 4.2 Run idba_ud -  assembly the samples
    ###########################################################################
    update_status(x,sampleid,db.id,protocol,"Assembling")
    idba_ud=root.program('idba_ud',sample,db)
    if not root.isdir(idba_ud.out):
        idba_ud.run();
        os.system(' cd ' + idba_ud.path + ' &&  rm kmer contig-* align-* graph-* local-contig-* reads.fa')

    ###########################################################################
    # 4.2 Run gene finder -  look at the genes over the scaffolds
    ###########################################################################
    prodigal=root.program("prodigal", sample,db)
    update_status(x,sampleid,db.id,protocol,"Finding Genes")
    if not root.isdir(prodigal.output+".gff"): prodigal.run()

    if db.name=="abcdefghij":
        print "MetaPlAn2"
        update_status(x,sampleid,db.id,protocol,"Processing")
        metaphlan=root.program('MetaPhlAn',sample,db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        #print "Here 2"
        G=txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out)
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if db.name=='MyTaxa':
        #print "MyTaxa"
        taxa=root.mytaxa(sample,db)
        update_status(x,sampleid,db.id,protocol,"Screening")
        if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre()
        if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align()
        if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd()
        if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre()
        if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run()
        update_status(x,sampleid,db.id,protocol,"Quantification")
        data=taxa.postM()
        G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance")
        abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches")
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if not db.taxo=="none":
        print "taxonomy"
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Screening")
        if db.name=="ryaetguxun":
            blastn=root.program('diamond_blastp',sample,db)
        else:
            blastn=root.program('blastn',sample,db)
        blastn.run()
        #blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name )
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out)
        abn.start()
        root.updateStatus(x,projectid,sampleid,"done")
        update_status(x,sampleid,db.id,protocol,"Done")
    if not db.func=="none":
        print "functional annotation"
        update_status(x,sampleid,db.id,protocol,"Screening")
        fileso=root.result_files(projectid, "function", protocol, sampleid, db.name)
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        root.updateStatus(x,projectid,sampleid,"functional annotation")
        blastn=root.program('diamond_blastp',sample,db)
        blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out)
        abn.createFuncDb(abundance)
        update_status(x,sampleid,db.id,protocol,"Done")
예제 #8
0
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads):
    #db=root.dataset(db)
    x = sql.SQL(root.filedb())
    xpath = x.project(projectid)[0][4]
    val = x.exe('update samples set reads1="' + reads1 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    val = x.exe('update samples set reads2="' + reads2 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    samples = x.exe('select * from samples where project_id="' + projectid +
                    '" and sample_id="' + sampleid + '"')
    sample = samples[0]
    sample = root.samples(sample, xpath)
    root.mkdir(sample.matchesDir)
    rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/"

    if db.name == "abcdefghij":
        #print "MetaPhlAnn"
        update_status(x, sampleid, db.id, protocol, "Processing")
        metaphlan = root.program('MetaPhlAnR', sample, db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        G = txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy",
                                 metaphlan.out)
        abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")

    if not db.taxo == "none":
        # run bowtie using the paired end reads
        update_status(x, sampleid, db.id, 'matches', "Screening")
        cmd = " ".join([
            root.__ROOTEXEDIR__ + 'bowtie2',
            '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x',
            db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S',
            sample.matchesDir + '/alignment.' + db.id + '.matches >>',
            root.log, '2>&1'
        ])
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches'):
            os.system(cmd)
        #process output in sam format to get genes and number of reads per gene.
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.taxonomy.abundance.results.sqlite3.db'):
            abundance = parse_sam(
                sample.matchesDir + '/alignment.' + db.id + '.matches', db,
                good_reads)
            G = txp.taxonomy_tree(
                abundance,
                sample.matchesDir + '/alignment.' + db.id + '.matches',
                protocol, "taxonomy", db.id)
            abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy",
                                     sample.matchesDir + '/alignment.' +
                                     db.id +
                                     '.matches')  # Store data in the sql TABLE
            abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")
        return 'success'
    if not db.func == "none":
        fileso = root.result_files(projectid, "function", protocol, sampleid,
                                   db.name)
        #Merge paired ends
        update_status(x, sampleid, db.id, protocol, "Merge")
        cmd = " ".join([
            'python',
            root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ",
            sample.matchesDir + '/merged.reads.fastq', sample.reads1,
            sample.reads2
        ])
        #print cmd
        root.flog(cmd)  #print cmd
        if not root.isdir(sample.matchesDir + '/merged.reads.fastq'):
            os.system(cmd)
        #Get fasta files
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/seqtk seq -a',
            sample.matchesDir + '/merged.reads.fastq >',
            sample.matchesDir + '/merged.reads.fasta'
        ])
        if not root.isdir(sample.matchesDir + '/merged.reads.fasta'):
            os.system(cmd)
        #BlastX from diamond
        update_status(x, sampleid, db.id, protocol, "Screening")
        dout = sample.matchesDir + 'alignment.' + db.id
        din = sample.matchesDir + '/merged.reads.fasta'
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p +
            ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din,
            '>>', root.log, "2>&1"
        ])
        if not root.isdir(dout + '.daa'): os.system(cmd)
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o',
            dout + '.matches -f tab', ">>", root.log, "2>&1"
        ])
        if not root.isdir(dout + '.matches'): os.system(cmd)
        # parse diamond output
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.function.abundance.results.sqlite3.db'):
            abundance = pb(dout + '.matches', db.func, db.len, db.funcdb,
                           "function", db.name, fileso.GGenes + ".rpkm",
                           good_reads)
            #abundance=pdx(dout+'.matches', db, good_reads)
            abn = root.SampleResults(sample, 'none', protocol, db.name,
                                     "function", dout + '.matches')
            abn.createFuncDb(abundance)
        update_status(x, sampleid, db.id, protocol, "Done")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' +
                  root.log + " 2>&1")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' +
                  root.log + " 2>&1")
        return 'success'
예제 #9
0
def taxonomy(data):
    pid = data["pid"]
    uid = data["uid"]
    sids = data["sid"]
    pipeline = data["pip"]
    rid = data["rid"]
    edges = []
    FULL_MATRIX = []
    #first see if the data set contains taxonomy, function or both annotations
    analysis = "taxonomy"
    #print '\n\n\n here we goo!!!\n\n\n', pipeline
    rootvar.mkdir(rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline +
                  "/RESULTS/")
    all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk"
    #
    stored_samples = 0
    if os.path.isfile(all_samples_tree_file):
        #print '\n\n\n if the X file has been created'
        x = sql.SQL(all_samples_tree_file + ".db")
        val = x.exe("select distinct sample_name from full_matrix")
        stored_samples = len(val)
    #
    x = sql.SQL(main_db)
    sids = x.exe(
        'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'
        + str(pipeline) +
        '") b on a.sample_id==b.sid) c where c.project_id=="' + pid +
        '" and c.status="Done" and c.rid="' + rid + '"')
    #
    #
    #print '\n\n\n\n super important \n', sids
    #print '\n\n\n get all the samples from the X file'
    #
    if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids):
        #print 'both have the same length'
        G = nx.read_gpickle(all_samples_tree_file)
        tree = json_graph.tree_data(G, root='R')
        return ["taxonomy", tree]
    #section
    #print '\n\n\n SO the file has not been created, because the matrix of abundances has not been fetched for those samples'
    nodes = {}
    #print '\n\n\n So for each sample I get all the information from the sql tables. And create the tree \n\n\n\n\n\n'
    #
    for sid in sids:
        samples = x.exe('select * from samples where project_id="' + pid +
                        '" and sample_id="' + sid[0] + '"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid)
        view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf)
        FULL_MATRIX += view.all()
        Gp = nx.read_gpickle(rf.pk)
        edges += Gp.edges()
        for node in Gp.nodes():
            if not node in nodes:
                nodes[node] = Gp.node[node]['level']
    x.close()
    rootvar.full_matrix_sql(all_samples_tree_file + ".db", FULL_MATRIX)
    G = nx.DiGraph()
    for i in edges:
        if not i in G.edges():
            if not i[1] in G.nodes():
                G.add_node(i[1], samples=1, level=nodes[i[1]])
            else:
                G.node[i[1]]['samples'] += 1
            if not i[0] in G.nodes():
                G.add_node(i[0], samples=1, level=nodes[i[0]])
            else:
                G.node[i[0]]['samples'] += 1
            if not G.predecessors(i[1]):
                G.add_edge(i[0], i[1])
        else:
            G.node[i[0]]['samples'] += 1
            G.node[i[1]]['samples'] += 1
    tree = json_graph.tree_data(G, root='R')
    nx.write_gpickle(G, all_samples_tree_file)
    return ["taxonomy", tree, all_samples_tree_file + ".json"]