Exemple #1
0
def register_dataset():
    try:
        # {'rname':'test', 'rtype':'prot', 'rdesc':'test', 'uid':'test'}#
        data = request.get_json()
        rname = data["rname"]
        rtype = data["rtype"]
        rdesc = data["rdesc"]
        uid = data["uid"]
        if uid == "TesREPDooc73Ohw":
            return 'User not allowed'
        # x=sql.SQL(rootvar.__FILEDB__)
        rid = rootvar.get_alphabet(10)
        rpath = rootvar.__ROOTDBS__+rid
        ref_format_path = rootvar.__ROOTDBS__+rid
        rootvar.mkdir(rpath)
        g.db.execute('INSERT INTO reference VALUES (?,?,?,?,?,?,?,?,?,?,?)', (rid, rname,
                                                                              rtype, rdesc, rpath, uid, "none", "none", "none", "created", ref_format_path))
        g.db.commit()
        return jsonify(rid=rid)
    except Exception as inst:
        return "ERROR: "+str(inst)
Exemple #2
0
def upload(file, dir):
    app.config['UPLOAD_FOLDER'] = dir
    rootvar.mkdir(dir)
    if file:
        filename = secure_filename(file.filename)
        file.save(os.path.join(app.config['UPLOAD_FOLDER'], filename))
Exemple #3
0
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads):
    #db=root.dataset(db)
    x = sql.SQL(root.filedb())
    xpath = x.project(projectid)[0][4]
    val = x.exe('update samples set reads1="' + reads1 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    val = x.exe('update samples set reads2="' + reads2 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    samples = x.exe('select * from samples where project_id="' + projectid +
                    '" and sample_id="' + sampleid + '"')
    sample = samples[0]
    sample = root.samples(sample, xpath)
    root.mkdir(sample.matchesDir)
    rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/"

    if db.name == "abcdefghij":
        #print "MetaPhlAnn"
        update_status(x, sampleid, db.id, protocol, "Processing")
        metaphlan = root.program('MetaPhlAnR', sample, db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        G = txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy",
                                 metaphlan.out)
        abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")

    if not db.taxo == "none":
        # run bowtie using the paired end reads
        update_status(x, sampleid, db.id, 'matches', "Screening")
        cmd = " ".join([
            root.__ROOTEXEDIR__ + 'bowtie2',
            '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x',
            db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S',
            sample.matchesDir + '/alignment.' + db.id + '.matches >>',
            root.log, '2>&1'
        ])
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches'):
            os.system(cmd)
        #process output in sam format to get genes and number of reads per gene.
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.taxonomy.abundance.results.sqlite3.db'):
            abundance = parse_sam(
                sample.matchesDir + '/alignment.' + db.id + '.matches', db,
                good_reads)
            G = txp.taxonomy_tree(
                abundance,
                sample.matchesDir + '/alignment.' + db.id + '.matches',
                protocol, "taxonomy", db.id)
            abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy",
                                     sample.matchesDir + '/alignment.' +
                                     db.id +
                                     '.matches')  # Store data in the sql TABLE
            abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")
        return 'success'
    if not db.func == "none":
        fileso = root.result_files(projectid, "function", protocol, sampleid,
                                   db.name)
        #Merge paired ends
        update_status(x, sampleid, db.id, protocol, "Merge")
        cmd = " ".join([
            'python',
            root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ",
            sample.matchesDir + '/merged.reads.fastq', sample.reads1,
            sample.reads2
        ])
        #print cmd
        root.flog(cmd)  #print cmd
        if not root.isdir(sample.matchesDir + '/merged.reads.fastq'):
            os.system(cmd)
        #Get fasta files
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/seqtk seq -a',
            sample.matchesDir + '/merged.reads.fastq >',
            sample.matchesDir + '/merged.reads.fasta'
        ])
        if not root.isdir(sample.matchesDir + '/merged.reads.fasta'):
            os.system(cmd)
        #BlastX from diamond
        update_status(x, sampleid, db.id, protocol, "Screening")
        dout = sample.matchesDir + 'alignment.' + db.id
        din = sample.matchesDir + '/merged.reads.fasta'
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p +
            ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din,
            '>>', root.log, "2>&1"
        ])
        if not root.isdir(dout + '.daa'): os.system(cmd)
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o',
            dout + '.matches -f tab', ">>", root.log, "2>&1"
        ])
        if not root.isdir(dout + '.matches'): os.system(cmd)
        # parse diamond output
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.function.abundance.results.sqlite3.db'):
            abundance = pb(dout + '.matches', db.func, db.len, db.funcdb,
                           "function", db.name, fileso.GGenes + ".rpkm",
                           good_reads)
            #abundance=pdx(dout+'.matches', db, good_reads)
            abn = root.SampleResults(sample, 'none', protocol, db.name,
                                     "function", dout + '.matches')
            abn.createFuncDb(abundance)
        update_status(x, sampleid, db.id, protocol, "Done")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' +
                  root.log + " 2>&1")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' +
                  root.log + " 2>&1")
        return 'success'
Exemple #4
0
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads):
    #db=root.dataset(db)
    #1 get project path
    x=sql.SQL(root.filedb())
    xpath=x.project(projectid)[0][4]
    #print 'here------------'
    ###########################################################################
    #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified.
    ###########################################################################
    val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    ###########################################################################
    #3 get the sample full information - load the class samples
    ###########################################################################
    samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    sample=samples[0]
    sample=root.samples(sample,xpath)
    root.mkdir(sample.assemblyDir)
    ###########################################################################
    # 4.1 Run fq2fa -  this is used by udba_ud program
    ###########################################################################
    idba_ud=root.program('idba_ud',sample,db)
    update_status(x,sampleid,db.id,protocol,"Preprocessing")
    fq2fa=root.program('fq2fa', sample,db)
    if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly

    ###########################################################################
    # 4.2 Run idba_ud -  assembly the samples
    ###########################################################################
    update_status(x,sampleid,db.id,protocol,"Assembling")
    idba_ud=root.program('idba_ud',sample,db)
    if not root.isdir(idba_ud.out):
        idba_ud.run();
        os.system(' cd ' + idba_ud.path + ' &&  rm kmer contig-* align-* graph-* local-contig-* reads.fa')

    ###########################################################################
    # 4.2 Run gene finder -  look at the genes over the scaffolds
    ###########################################################################
    prodigal=root.program("prodigal", sample,db)
    update_status(x,sampleid,db.id,protocol,"Finding Genes")
    if not root.isdir(prodigal.output+".gff"): prodigal.run()

    if db.name=="abcdefghij":
        print "MetaPlAn2"
        update_status(x,sampleid,db.id,protocol,"Processing")
        metaphlan=root.program('MetaPhlAn',sample,db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        #print "Here 2"
        G=txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out)
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if db.name=='MyTaxa':
        #print "MyTaxa"
        taxa=root.mytaxa(sample,db)
        update_status(x,sampleid,db.id,protocol,"Screening")
        if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre()
        if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align()
        if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd()
        if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre()
        if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run()
        update_status(x,sampleid,db.id,protocol,"Quantification")
        data=taxa.postM()
        G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance")
        abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches")
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if not db.taxo=="none":
        print "taxonomy"
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Screening")
        if db.name=="ryaetguxun":
            blastn=root.program('diamond_blastp',sample,db)
        else:
            blastn=root.program('blastn',sample,db)
        blastn.run()
        #blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name )
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out)
        abn.start()
        root.updateStatus(x,projectid,sampleid,"done")
        update_status(x,sampleid,db.id,protocol,"Done")
    if not db.func=="none":
        print "functional annotation"
        update_status(x,sampleid,db.id,protocol,"Screening")
        fileso=root.result_files(projectid, "function", protocol, sampleid, db.name)
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        root.updateStatus(x,projectid,sampleid,"functional annotation")
        blastn=root.program('diamond_blastp',sample,db)
        blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out)
        abn.createFuncDb(abundance)
        update_status(x,sampleid,db.id,protocol,"Done")
def taxonomy(data):
    pid = data["pid"]
    uid = data["uid"]
    sids = data["sid"]
    pipeline = data["pip"]
    rid = data["rid"]
    edges = []
    FULL_MATRIX = []
    #first see if the data set contains taxonomy, function or both annotations
    analysis = "taxonomy"
    #print '\n\n\n here we goo!!!\n\n\n', pipeline
    rootvar.mkdir(rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline +
                  "/RESULTS/")
    all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk"
    #
    stored_samples = 0
    if os.path.isfile(all_samples_tree_file):
        #print '\n\n\n if the X file has been created'
        x = sql.SQL(all_samples_tree_file + ".db")
        val = x.exe("select distinct sample_name from full_matrix")
        stored_samples = len(val)
    #
    x = sql.SQL(main_db)
    sids = x.exe(
        'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'
        + str(pipeline) +
        '") b on a.sample_id==b.sid) c where c.project_id=="' + pid +
        '" and c.status="Done" and c.rid="' + rid + '"')
    #
    #
    #print '\n\n\n\n super important \n', sids
    #print '\n\n\n get all the samples from the X file'
    #
    if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids):
        #print 'both have the same length'
        G = nx.read_gpickle(all_samples_tree_file)
        tree = json_graph.tree_data(G, root='R')
        return ["taxonomy", tree]
    #section
    #print '\n\n\n SO the file has not been created, because the matrix of abundances has not been fetched for those samples'
    nodes = {}
    #print '\n\n\n So for each sample I get all the information from the sql tables. And create the tree \n\n\n\n\n\n'
    #
    for sid in sids:
        samples = x.exe('select * from samples where project_id="' + pid +
                        '" and sample_id="' + sid[0] + '"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid)
        view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf)
        FULL_MATRIX += view.all()
        Gp = nx.read_gpickle(rf.pk)
        edges += Gp.edges()
        for node in Gp.nodes():
            if not node in nodes:
                nodes[node] = Gp.node[node]['level']
    x.close()
    rootvar.full_matrix_sql(all_samples_tree_file + ".db", FULL_MATRIX)
    G = nx.DiGraph()
    for i in edges:
        if not i in G.edges():
            if not i[1] in G.nodes():
                G.add_node(i[1], samples=1, level=nodes[i[1]])
            else:
                G.node[i[1]]['samples'] += 1
            if not i[0] in G.nodes():
                G.add_node(i[0], samples=1, level=nodes[i[0]])
            else:
                G.node[i[0]]['samples'] += 1
            if not G.predecessors(i[1]):
                G.add_edge(i[0], i[1])
        else:
            G.node[i[0]]['samples'] += 1
            G.node[i[1]]['samples'] += 1
    tree = json_graph.tree_data(G, root='R')
    nx.write_gpickle(G, all_samples_tree_file)
    return ["taxonomy", tree, all_samples_tree_file + ".json"]