def functional(data):
    pid = data["pid"]
    uid = data["uid"]
    sids = data["sid"]
    pipeline = data["pip"]
    rid = data["rid"]
    edges = []
    FULL_MATRIX = []
    #first see if the data set contains taxonomy, function or both annotations
    analysis = "function"
    #print rid
    #
    #
    os.system("mkdir " + rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline +
              "/RESULTS/ >> " + rootvar.log + " 2>&1")
    all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk"
    #
    if os.path.isfile(all_samples_tree_file):
        x = sql.SQL(all_samples_tree_file + ".db")
        val = x.exe("select distinct sample_name from full_matrix")
        stored_samples = len(val)
    #
    x = sql.SQL(main_db)
    # load all the samples that have been finalized to run
    sids = x.exe(
        'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'
        + str(pipeline) +
        '") b on a.sample_id==b.sid) c where c.project_id=="' + pid +
        '" and c.status="Done" and c.rid="' + rid + '"')
    x.close()
    if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids):
        x = sql.SQL(all_samples_tree_file + ".db")
        FULL_MATRIX = x.exe("select * from full_matrix")
        return ["function", FULL_MATRIX, all_samples_tree_file + ".json"]
    nodes = {}

    x = sql.SQL(main_db)

    for sid in sids:
        samples = x.exe('select * from samples where project_id="' + pid +
                        '" and sample_id="' + sid[0] + '"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        #print sample.name
        rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid)
        view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf)
        FULL_MATRIX += view.all_func()
        #print sample.name
    x.close()
    #print FULL_MATRIX[0]
    #rootvar.full_matrix_function(all_samples_tree_file+".db", FULL_MATRIX)
    #print "matrix done"
    tree = ["none"]
    return ["function", FULL_MATRIX, all_samples_tree_file + ".json"]
Beispiel #2
0
def get_taxo_by_name():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    tid = data["tid"]
    lid = data["lid"]
    pip = data["pip"]
    dbname = data["rid"]
    pipeline = pip
    # dbname="isuezrouja"
    analysis = "taxonomy"
    x = sql.SQL(main_db)
    rf = rootvar.result_files(pid, analysis, pip, sid, dbname)
    samples = x.exe('select * from samples where project_id="' +
                    pid+'" and sample_id="'+sid+'"')
    xpath = x.project(pid)[0][4]
    sample = rootvar.samples(samples[0], xpath)
    x.close()
    view = rootvar.ViewSampleResults(sample, pipeline, dbname, analysis, rf)
    try:
        # return jsonify(x=pip)
        return jsonify(matrix=view.level(lid, type="matches"))
    except Exception as inst:
        return "ERROR: "+str(inst)
Beispiel #3
0
def get_tree():
    try:
        data = request.get_json()
        pid = data["pid"]
        uid = data["uid"]
        sid = data["sid"]
        selval = data["value"]
        pipeline = data["pip"]
        dbname = data["rid"]
        # dbname="isuezrouja"
        analysis = "taxonomy"
        rf = rootvar.result_files(pid, analysis, pipeline, sid, dbname)
        x = sql.SQL(main_db)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        with open(rf.json) as data_file:
            data = json.load(data_file)
        x.close()
        ###print data
        view = rootvar.ViewSampleResults(
            sample, pipeline, dbname, analysis, rf)
        return jsonify(tree=[data], range=view.range(selval)[0], pip=pipeline)
    except Exception as inst:
        return "ERROR: "+str(inst)
Beispiel #4
0
def get_childs_of_taxonomy():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    tid = data["tid"]
    lid = data["lid"]
    cond = data["cond"]
    pip = data["pip"]
    rid = data["rid"]

    dbname = data["rid"]
    analysis = "taxonomy"

    ##print cond, pip, rid, pid, uid, sid, tid, lid

    if cond == "one":  # this is if I am analyzing one or multiple samples
        rf = rootvar.result_files(pid, analysis, pip, sid, dbname)
        x = sql.SQL(main_db)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        x.close()
        view = rootvar.ViewSampleResults(sample, pip, dbname, analysis, rf)
        matrix = view.childs_of(tid, 'rpkm')
        ###print matrix
        return jsonify(matrix=matrix)
    else:
        # load big tree
        all_samples_tree_file = rootvar.__ROOTPRO__+"/" + \
            pid+"/"+pip+"/RESULTS/"+rid+".all_samples_tree.pk"
        G = nx.read_gpickle(all_samples_tree_file)
        tree = json_graph.tree_data(G, root='R')
        data = []
        for i in G.successors(tid):
            data.append(i)
        x = sql.SQL(all_samples_tree_file+".db")

        matrix = rootvar.get_matrix_level_childs(x, data)
        samples_sel = list(set([str(i[0]) for i in matrix]))
        M, N = rootvar.v2m(matrix, samples_sel, 0, 0)
        ##print M
        heatmap = 'none'  # heatmap=iclust.main(M,None)
        return jsonify(data=matrix[0], heatmap=heatmap)
Beispiel #5
0
def get_statistics():
    data = request.get_json()
    pid = data["pid"]
    uid = data["uid"]
    sid = data["sid"]
    programs = data["value"]
    dbname = "isuezrouja"

    x = sql.SQL(main_db)
    samples = x.exe('select * from samples where project_id="' +
                    pid+'" and sample_id="'+sid+'"')
    xpath = x.project(pid)[0][4]
    pname = str(x.project(pid)[0][1])
    ##print pname
    sample = rootvar.samples(samples[0], xpath)
    x.close()
    ###print matrix
    data = []
    pipeline = "matches"
    for program in programs:
        data.append(statsP(sample, program, sid, dbname, pipeline))
    return jsonify(stats=data, sname=sample.name, pname=pname)
Beispiel #6
0
def get_functional_counts():
    try:
        data = request.get_json()
        pid = data["pid"]
        uid = data["uid"]
        sid = data["sid"]
        pip = data["pip"]
        rid = data["rid"]
        pip = data['pip']
        analysis = "function"
        # lid="assembly"
        # file=rootvar.__ROOTPRO__+"/"+pid+"/assembly/idba_ud/"+sid+"/pred.genes."+rid+".matches.function.abundance.results.sqlite3.db"
        x = sql.SQL(main_db)
        rf = rootvar.result_files(pid, "function", pip, sid, rid)
        samples = x.exe('select * from samples where project_id="' +
                        pid+'" and sample_id="'+sid+'"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        x.close()
        view = rootvar.ViewSampleResults(sample, pip, rid, analysis, rf)
        return jsonify(matrix=view.func_one_sample(), m2=view.func_structure(pid, pip, sid))
    except Exception as inst:
        return "ERROR: "+str(inst)
def run(data):
    rid = data['rid']
    x = sql.SQL(main_db)
    value = x.exe(
        'select taxofile,functfile from reference where reference_id="' + rid +
        '"')[0]
    x.close()
    if rid == "MyTaxa" or rid == "abcdefghij":
        taxo = taxonomy(data)
        return taxo

    if value[0] != "none":
        #print 'taxonomy'
        taxo = taxonomy(data)
    if value[1] != "none":
        func = functional(data)

    if value[0] != "none" and value[1] != "none":
        return [taxo, func]
    elif value[0] != "none":
        return taxo
    elif value[1] != "none":
        return func
Beispiel #8
0
def run(data,main_db,DBS):
    project_id=str(data['pid'])
    pipeline=str(data['pip'])
    reads1=str(data['read1'])
    reads2=str(data['read2'])
    sample_id=str(data['sid'])
    user_id=str(data['uid'])
    refs=data["rids"]
    try:
        a=refs[0]
    except:
        return 'Error: No database has been selected'

    x=sql.SQL(main_db)
    x.exe('UPDATE project_status SET status="queue" WHERE project_id="'+project_id+'" AND sample_id="'+sample_id+'"')
    x.insert("sample_run",(sample_id,"created")) # let know that the files have been uploaded and the files created
    x.commit()
    #preprocessing FIXME: I need to get permisions to write
    rdir=rootvar.__ROOTPRO__+"/"+project_id+"/READS/"
    trim=pre.trimmomatic(rdir+reads1,rdir+reads2,rdir,sample_id)
    #if not rootvar.isdir(trim.outd+sample_id+'.trim.log'): trim.run()
    trim.run()
    #print "trimming"
    reads1=reads1.replace(".gz","")
    reads2=reads2.replace(".gz","")
    #print pipeline
    ##################################################################################################
    ##### This is very important,
    ##### First: Check if the GREENGENES database has been used for finding the 16s rRNAs, if not,
    ##### The greengenes database is force d to run this database will run first for normalization purposes.
    ##################################################################################################
    if pipeline=="assembly": greengenes_file=rootvar.__ROOTPRO__+"/"+project_id+"/assembly/idba_ud/"+sample_id+"/pred.genes.Gbfbquhild.matches"
    if pipeline=="matches": greengenes_file=rootvar.__ROOTPRO__+"/"+project_id+"/matches/"+sample_id+"/pred.genes.Gbfbquhild.matches"
    #_______________________________________________________________##################################
    #
    # Get the number of reads
    tfile=trim.outd+sample_id+'trim.log'
    for i in open(tfile):
        if "Input Read Pairs:" in i:
            i=i.split()
            raw_reads=float(i[3])
            good_reads=float(i[6]) # the number of high quality reads after trimming and quality filter
    print good_reads

    if pipeline == "matches":
        log = my_logger(logfile=rootvar.__ROOTPRO__+"/"+project_id+"/matches/"+sample_id+"/arc_run.qsub.log")
        log.info('running read match pipeline')
        for ref in refs:
            log.info('Processing %s with reference id: %s'%(sample_id, ref) )
            if not x.exe('select * from matches where sample_id="'+sample_id+'" and datasets="'+ref+'"'):
                x.c.execute('INSERT OR IGNORE INTO matches VALUES (?,?,?,?)', (sample_id,user_id,project_id,ref))
                x.commit()
            try:
                val = MP(project_id, sample_id, DBS[ref], "matches", reads1, reads2, good_reads)
            except Exception as e:
                log.error(str(e))
                log.error('Exception\t%s\t%s'%(sample_id, ref))
    #
    #this is for the aseembly section
    #
    if pipeline=="assembly":
        log = my_logger(logfile=rootvar.__ROOTPRO__ + "/" + project_id + "/assembly/idba_ud/" + sample_id + "/arc_run.qsub.log")
        log.info('running assembly pipeline')
        for ref in refs:
            log.info('Processing %s with reference id: %s'%(sample_id, ref) )
            if not x.exe('select * from assembly where sample_id="'+sample_id+'" and datasets="'+ref+'"'):
                x.c.execute('INSERT OR IGNORE INTO assembly VALUES (?,?,?,?)', (sample_id,user_id,project_id,ref))
                x.commit()
            try:
                idba(project_id, sample_id, DBS[ref], "assembly", reads1, reads2, good_reads)
            except Exception as e:
                log.error(str(e))
                log.error('Exception\t%s\t%s' % (sample_id, ref))

    x.close()
    # in the end the fastq files need to be removed in order to save storage space;
    if not rootvar.isdir(rdir+reads1+".gz"):
        os.system('gzip '+rdir+reads1+' >> '+rootvar.log+" 2>&1")
        os.system('rm '+rdir+reads1+' >> '+rootvar.log+" 2>&1")
    else:
        os.system('rm '+rdir+reads1+' >> '+rootvar.log+" 2>&1")

    if not rootvar.isdir(rdir+reads2+".gz"):
        os.system('gzip '+rdir+reads2+' >> '+rootvar.log+" 2>&1")
        os.system('rm '+rdir+reads2+' >> '+rootvar.log+" 2>&1")
    else:
        os.system('rm '+rdir+reads2+' >> '+rootvar.log+" 2>&1")

    return 'success'
Beispiel #9
0
def RunMetaGen():
    try:
        data = request.get_json()
        refs = data["rids"]
        sid = data['sid']
        uid = data['uid']
        pip = data['pip']
        msg = data['msg']

        if uid == "TesREPDooc73Ohw":
            return 'User not allowed'
        S = query_db('select * from user  where user_id="'+uid+'"')
        T = query_db('select * from samples where sample_id="'+sid+'"')
        date = time.strftime("%m/%d/%Y")
        pid = T[0]['project_id']
        arg = base64.b64encode(json.dumps(
            [data, refs, sid, uid, pip, rootvar.__FILEDB__, S, T]))
        SArc = bench2archu(
            'python /groups/metastorm_cscee/MetaStorm/process.py ' + arg)

        if pip == "assembly":
            do = "/groups/metastorm_cscee/MetaStorm/Files/PROJECTS/"+data['pid']+"/assembly/idba_ud/"+sid+"/arc_run.qsub.init"
        else:
            do = "/groups/metastorm_cscee/MetaStorm/Files/PROJECTS/"+data['pid']+"/matches/"+sid+"/arc_run.qsub.init"

        try:
            check_job = bench2archu('cat {}'.format(do))
            job_id, _, _ = [i for i in check_job['out'].split('\n')][0].split('.')
            SArc.update({'job_id': job_id})
        except:
            SArc.update({'job_id': 'NULL'})
            pass

        x = sql.SQL(rootvar.__FILEDB__)
        update_jobs(x, [uid, T[0]['project_id'], sid, pip, arg,
                        'queue', 'normal', date, SArc['out'].split(".")[0]])

        val = x.exe('update samples set reads1="' +
                    data['read1']+'" where project_id="'+data['pid']+'" and sample_id="'+sid+'"')
        val = x.exe('update samples set reads2="' +
                    data['read2']+'" where project_id="'+data['pid']+'" and sample_id="'+sid+'"')

        for ref in refs:
            update_status(x, sid, ref, pip, "queue")
            if not x.exe('select * from '+pip+' where sample_id="'+sid+'" and datasets="'+ref+'"'):
                x.c.execute('INSERT OR IGNORE INTO '+pip +
                            ' VALUES (?,?,?,?)', (sid, uid, pid, ref))
                x.commit()

        x.close()

        try:
            os.system("mkdir -p "+rootvar.__ROOTPRO__+"/" +
                      data['pid']+"/assembly/idba_ud/"+sid)
            os.system("mkdir -p "+rootvar.__ROOTPRO__ +
                      "/"+data['pid']+"/assembly/RESULTS/")
            os.system("mkdir -p "+rootvar.__ROOTPRO__ +
                      "/"+data['pid']+"/matches/"+sid)
            os.system("mkdir -p "+rootvar.__ROOTPRO__ +
                      "/"+data['pid']+"/matches/RESULTS/")
            os.system("mkdir -p "+rootvar.__ROOTPRO__+"/"+data['pid']+"/READS")
        except:
            pass
        

        x = email.send_email(S[0]['user_name'], S[0]['user_affiliation'],
                             'Processing sample: '+T[0]['sample_name'], "Dear MetaStorm User, <br><br><br> the sample <b>" +
                             T[0]['sample_name'] +
                             '</b> has been submitted into the MetaStorm server. It will run the <b>' +
                             pip+'</b> pipeline <br><br>'
                             'The time for making the analysis depends on the current web traffic and availability of the web server. Once the ' +
                             'analysis is done you will receive a notification via email. <br><br><br><br>' +
                             'Thank you<br><b>MetaStorm Team</b>')

        return jsonify(SArc)
    except Exception as inst:
        return "ERROR: "+str(inst)
Beispiel #10
0
def idbaud(projectid,sampleid,db,protocol,reads1, reads2, good_reads):
    #db=root.dataset(db)
    #1 get project path
    x=sql.SQL(root.filedb())
    xpath=x.project(projectid)[0][4]
    #print 'here------------'
    ###########################################################################
    #2 update the reads on the sql dataset, doit anyway, so if the sample is re run just take the new input, it could be modified.
    ###########################################################################
    val=x.exe('update samples set reads1="'+reads1+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    val=x.exe('update samples set reads2="'+reads2+'" where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    ###########################################################################
    #3 get the sample full information - load the class samples
    ###########################################################################
    samples=x.exe('select * from samples where project_id="'+projectid+'" and sample_id="'+sampleid+'"')
    sample=samples[0]
    sample=root.samples(sample,xpath)
    root.mkdir(sample.assemblyDir)
    ###########################################################################
    # 4.1 Run fq2fa -  this is used by udba_ud program
    ###########################################################################
    idba_ud=root.program('idba_ud',sample,db)
    update_status(x,sampleid,db.id,protocol,"Preprocessing")
    fq2fa=root.program('fq2fa', sample,db)
    if not root.isdir(idba_ud.out): fq2fa.run() #make sure that there is a scaffold.fa file. If not, it computes again the fastq to fasta and the assembly

    ###########################################################################
    # 4.2 Run idba_ud -  assembly the samples
    ###########################################################################
    update_status(x,sampleid,db.id,protocol,"Assembling")
    idba_ud=root.program('idba_ud',sample,db)
    if not root.isdir(idba_ud.out):
        idba_ud.run();
        os.system(' cd ' + idba_ud.path + ' &&  rm kmer contig-* align-* graph-* local-contig-* reads.fa')

    ###########################################################################
    # 4.2 Run gene finder -  look at the genes over the scaffolds
    ###########################################################################
    prodigal=root.program("prodigal", sample,db)
    update_status(x,sampleid,db.id,protocol,"Finding Genes")
    if not root.isdir(prodigal.output+".gff"): prodigal.run()

    if db.name=="abcdefghij":
        print "MetaPlAn2"
        update_status(x,sampleid,db.id,protocol,"Processing")
        metaphlan=root.program('MetaPhlAn',sample,db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        #print "Here 2"
        G=txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", metaphlan.out)
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if db.name=='MyTaxa':
        #print "MyTaxa"
        taxa=root.mytaxa(sample,db)
        update_status(x,sampleid,db.id,protocol,"Screening")
        if not root.isdir(taxa.output+".prot.mytaxa.fa"): taxa.pre()
        if not root.isdir(taxa.output+".MyTaxa.matches.daa"): taxa.align()
        if not root.isdir(taxa.output+".MyTaxa.align"): taxa.postd()
        if not root.isdir(taxa.output+".MyTaxa.input"): taxa.mpre()
        if not root.isdir(taxa.output+".MyTaxa.out"): taxa.run()
        update_status(x,sampleid,db.id,protocol,"Quantification")
        data=taxa.postM()
        G=txp.mytaxa_taxonomy_tree(data,taxa.output+".MyTaxa.matches.taxonomy.abundance")
        abn=root.SampleResults(sample,G,protocol, "MyTaxa", "taxonomy", taxa.output+".MyTaxa.matches")
        abn.start()
        update_status(x,sampleid,db.id,protocol,"Done")

    if not db.taxo=="none":
        print "taxonomy"
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Screening")
        if db.name=="ryaetguxun":
            blastn=root.program('diamond_blastp',sample,db)
        else:
            blastn=root.program('blastn',sample,db)
        blastn.run()
        #blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.taxo, db.len, db.taxodb, "taxonomy", db.name, "none",good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        G=txp.taxonomy_tree(abundance,blastn.out, protocol, "taxonomy", db.name )
        abn=root.SampleResults(sample,G,protocol, db.name, "taxonomy", blastn.out)
        abn.start()
        root.updateStatus(x,projectid,sampleid,"done")
        update_status(x,sampleid,db.id,protocol,"Done")
    if not db.func=="none":
        print "functional annotation"
        update_status(x,sampleid,db.id,protocol,"Screening")
        fileso=root.result_files(projectid, "function", protocol, sampleid, db.name)
        ###########################################################################
        # 4.3 Run bowtie to find matches
        ###########################################################################
        root.updateStatus(x,projectid,sampleid,"functional annotation")
        blastn=root.program('diamond_blastp',sample,db)
        blastn.run()
        ###########################################################################
        # 4.4 taxonomy abundance
        ###########################################################################
        update_status(x,sampleid,db.id,protocol,"Quantification")
        abundance=pb(blastn.out, db.func, db.len, db.funcdb, "function", db.name, fileso.GGenes+".rpkm", good_reads)
        ###########################################################################
        # 4.5 processing Visualization
        ###########################################################################
        abn=root.SampleResults(sample,'none',protocol, db.name, "function", blastn.out)
        abn.createFuncDb(abundance)
        update_status(x,sampleid,db.id,protocol,"Done")
Beispiel #11
0
def process(projectid, sampleid, db, protocol, reads1, reads2, good_reads):
    #db=root.dataset(db)
    x = sql.SQL(root.filedb())
    xpath = x.project(projectid)[0][4]
    val = x.exe('update samples set reads1="' + reads1 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    val = x.exe('update samples set reads2="' + reads2 +
                '" where project_id="' + projectid + '" and sample_id="' +
                sampleid + '"')
    samples = x.exe('select * from samples where project_id="' + projectid +
                    '" and sample_id="' + sampleid + '"')
    sample = samples[0]
    sample = root.samples(sample, xpath)
    root.mkdir(sample.matchesDir)
    rdir = root.__ROOTPRO__ + "/" + projectid + "/READS/"

    if db.name == "abcdefghij":
        #print "MetaPhlAnn"
        update_status(x, sampleid, db.id, protocol, "Processing")
        metaphlan = root.program('MetaPhlAnR', sample, db)
        if not root.isdir(metaphlan.out): metaphlan.run()
        G = txp.metaphlan_taxonomy_tree(metaphlan.out)
        abn = root.SampleResults(sample, G, protocol, db.name, "taxonomy",
                                 metaphlan.out)
        abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")

    if not db.taxo == "none":
        # run bowtie using the paired end reads
        update_status(x, sampleid, db.id, 'matches', "Screening")
        cmd = " ".join([
            root.__ROOTEXEDIR__ + 'bowtie2',
            '--very-fast-local -p ' + p + ' --no-unal --no-hd --no-sq -x',
            db.bowtie, '-1', sample.reads1, '-2', sample.reads2, '-S',
            sample.matchesDir + '/alignment.' + db.id + '.matches >>',
            root.log, '2>&1'
        ])
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches'):
            os.system(cmd)
        #process output in sam format to get genes and number of reads per gene.
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.taxonomy.abundance.results.sqlite3.db'):
            abundance = parse_sam(
                sample.matchesDir + '/alignment.' + db.id + '.matches', db,
                good_reads)
            G = txp.taxonomy_tree(
                abundance,
                sample.matchesDir + '/alignment.' + db.id + '.matches',
                protocol, "taxonomy", db.id)
            abn = root.SampleResults(sample, G, protocol, db.id, "taxonomy",
                                     sample.matchesDir + '/alignment.' +
                                     db.id +
                                     '.matches')  # Store data in the sql TABLE
            abn.start()
        update_status(x, sampleid, db.id, protocol, "Done")
        return 'success'
    if not db.func == "none":
        fileso = root.result_files(projectid, "function", protocol, sampleid,
                                   db.name)
        #Merge paired ends
        update_status(x, sampleid, db.id, protocol, "Merge")
        cmd = " ".join([
            'python',
            root.__ROOTEXEDIR__ + "pairend_join.py -s -p " + p + " -m 8 -o ",
            sample.matchesDir + '/merged.reads.fastq', sample.reads1,
            sample.reads2
        ])
        #print cmd
        root.flog(cmd)  #print cmd
        if not root.isdir(sample.matchesDir + '/merged.reads.fastq'):
            os.system(cmd)
        #Get fasta files
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/seqtk seq -a',
            sample.matchesDir + '/merged.reads.fastq >',
            sample.matchesDir + '/merged.reads.fasta'
        ])
        if not root.isdir(sample.matchesDir + '/merged.reads.fasta'):
            os.system(cmd)
        #BlastX from diamond
        update_status(x, sampleid, db.id, protocol, "Screening")
        dout = sample.matchesDir + 'alignment.' + db.id
        din = sample.matchesDir + '/merged.reads.fasta'
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond blastx --id 60 -p ' + p +
            ' -k 1 -e 1e-5 -d', db.diamond, '-a', dout + '.pre', '-q', din,
            '>>', root.log, "2>&1"
        ])
        if not root.isdir(dout + '.daa'): os.system(cmd)
        cmd = ' '.join([
            root.__ROOTEXEDIR__ + '/diamond view -a', dout + '.pre.daa', '-o',
            dout + '.matches -f tab', ">>", root.log, "2>&1"
        ])
        if not root.isdir(dout + '.matches'): os.system(cmd)
        # parse diamond output
        update_status(x, sampleid, db.id, protocol, "Quantification")
        if not root.isdir(sample.matchesDir + '/alignment.' + db.id +
                          '.matches.function.abundance.results.sqlite3.db'):
            abundance = pb(dout + '.matches', db.func, db.len, db.funcdb,
                           "function", db.name, fileso.GGenes + ".rpkm",
                           good_reads)
            #abundance=pdx(dout+'.matches', db, good_reads)
            abn = root.SampleResults(sample, 'none', protocol, db.name,
                                     "function", dout + '.matches')
            abn.createFuncDb(abundance)
        update_status(x, sampleid, db.id, protocol, "Done")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fastq >> ' +
                  root.log + " 2>&1")
        os.system('rm ' + sample.matchesDir + '/merged.reads.fasta >> ' +
                  root.log + " 2>&1")
        return 'success'
def taxonomy(data):
    pid = data["pid"]
    uid = data["uid"]
    sids = data["sid"]
    pipeline = data["pip"]
    rid = data["rid"]
    edges = []
    FULL_MATRIX = []
    #first see if the data set contains taxonomy, function or both annotations
    analysis = "taxonomy"
    #print '\n\n\n here we goo!!!\n\n\n', pipeline
    rootvar.mkdir(rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline +
                  "/RESULTS/")
    all_samples_tree_file = rootvar.__ROOTPRO__ + "/" + pid + "/" + pipeline + "/RESULTS/" + rid + ".all_samples_tree.pk"
    #
    stored_samples = 0
    if os.path.isfile(all_samples_tree_file):
        #print '\n\n\n if the X file has been created'
        x = sql.SQL(all_samples_tree_file + ".db")
        val = x.exe("select distinct sample_name from full_matrix")
        stored_samples = len(val)
    #
    x = sql.SQL(main_db)
    sids = x.exe(
        'select c.sample_id from (select * from samples a inner join (select * from sample_status where pip="'
        + str(pipeline) +
        '") b on a.sample_id==b.sid) c where c.project_id=="' + pid +
        '" and c.status="Done" and c.rid="' + rid + '"')
    #
    #
    #print '\n\n\n\n super important \n', sids
    #print '\n\n\n get all the samples from the X file'
    #
    if os.path.isfile(all_samples_tree_file) and stored_samples == len(sids):
        #print 'both have the same length'
        G = nx.read_gpickle(all_samples_tree_file)
        tree = json_graph.tree_data(G, root='R')
        return ["taxonomy", tree]
    #section
    #print '\n\n\n SO the file has not been created, because the matrix of abundances has not been fetched for those samples'
    nodes = {}
    #print '\n\n\n So for each sample I get all the information from the sql tables. And create the tree \n\n\n\n\n\n'
    #
    for sid in sids:
        samples = x.exe('select * from samples where project_id="' + pid +
                        '" and sample_id="' + sid[0] + '"')
        xpath = x.project(pid)[0][4]
        sample = rootvar.samples(samples[0], xpath)
        rf = rootvar.result_files(pid, analysis, pipeline, sample.id, rid)
        view = rootvar.ViewSampleResults(sample, pipeline, rid, analysis, rf)
        FULL_MATRIX += view.all()
        Gp = nx.read_gpickle(rf.pk)
        edges += Gp.edges()
        for node in Gp.nodes():
            if not node in nodes:
                nodes[node] = Gp.node[node]['level']
    x.close()
    rootvar.full_matrix_sql(all_samples_tree_file + ".db", FULL_MATRIX)
    G = nx.DiGraph()
    for i in edges:
        if not i in G.edges():
            if not i[1] in G.nodes():
                G.add_node(i[1], samples=1, level=nodes[i[1]])
            else:
                G.node[i[1]]['samples'] += 1
            if not i[0] in G.nodes():
                G.add_node(i[0], samples=1, level=nodes[i[0]])
            else:
                G.node[i[0]]['samples'] += 1
            if not G.predecessors(i[1]):
                G.add_edge(i[0], i[1])
        else:
            G.node[i[0]]['samples'] += 1
            G.node[i[1]]['samples'] += 1
    tree = json_graph.tree_data(G, root='R')
    nx.write_gpickle(G, all_samples_tree_file)
    return ["taxonomy", tree, all_samples_tree_file + ".json"]
Beispiel #13
0
from app.lib.common import rootvar
from app.lib.common.sqlite3_commands import update_jobs, update_status
from app.lib.create_project import insert_new_project as sql
import time
import os
import base64
import json
from app.lib.common.arc_connect import bench2archu, arcon
from app.lib.email import Email as email
import datetime
database = sql.SQL(rootvar.__FILEDB__)
import re
import logging
import datetime


def get_results(job='', status='', message=[]):

    log = logging.getLogger()

    try:
        inp = json.loads(base64.b64decode(job))
        data = inp[0]
        refs = inp[1]
        sid = inp[2]
        uid = inp[3]
        pip = inp[4]
        USER = inp[6]
        SAMPLE = inp[7]

        if pip == "assembly":