コード例 #1
0
ファイル: rest.py プロジェクト: DeepInThought/Regovar
    async def get(self, request):
        peername = request.transport.get_extra_info('peername')
        if peername is not None:
            host, port = peername
        ws_id = "{}:{}".format(host, port)

        ws = web.WebSocketResponse()
        await ws.prepare(request)

        WebsocketHandler.socket_list.append((ws, ws_id))
        msg = {
            'action': 'hello',
            'data': [[str(_ws[1]) for _ws in WebsocketHandler.socket_list]]
        }
        core.notify_all(msg)

        try:
            async for msg in ws:
                if msg.type == aiohttp.WSMsgType.TEXT:
                    if msg.data == 'close':
                        log('CLOSE MESSAGE RECEIVED')
                        await ws.close()
                    else:
                        # Analyse message sent by client and send response if needed
                        data = msg.json()
                        if data['action'] == 'user_info':
                            log('WebsocketHandler {0} '.format(data['action']))
                            pass
                elif msg.type == aiohttp.WSMsgType.ERROR:
                    log('ws connection closed with exception {0}'.format(
                        ws.exception()))
        finally:
            WebsocketHandler.socket_list.remove((ws, ws_id))

        return ws
コード例 #2
0
 def set_status(self, job, new_status, notify=True, asynch=False):
     from core.core import core
     # Avoid useless notification
     # Impossible to change state of a job in error or canceled
     if job.status == new_status or job.status in  ["error", "canceled"]:
         return
     # Update status
     job.status = new_status
     job.save()
     
     # Need to do something according to the new status ?
     # Nothing to do for status : "waiting", "initializing", "running", "finalizing"
     if job.status in ["pause", "error", "done", "canceled"]:
         next_jobs = Session().query(Job).filter_by(status="waiting").order_by("priority").all()
         if len(next_jobs) > 0:
             if asynch: 
                 run_async(self.start, next_jobs[0].id)
             else:
                 self.start(next_jobs[0].id)
     elif job.status == "finalizing":
         if asynch: 
             run_async(self.finalize, job.id)
         else:
             self.finalize(job.id)
     # Push notification
     if notify:
         if new_status == "done":
             # Force reload to get generated outputs
             job.init(1, True)
             core.notify_all({"action": "job_updated", "data" : job.to_json(["id", "update_date", "status", "progress_value", "progress_label", "logs", "outputs"])})
         else:
             core.notify_all({"action": "job_updated", "data" : job.to_json(["id", "update_date", "status", "progress_value", "progress_label", "logs"])})
コード例 #3
0
ファイル: event_manager.py プロジェクト: REGOVAR/REGOVAR
    def log(self, author_id, type, meta, message, details=None):
        """
            Create an event
        """
        from core.core import core
        # Check data
        author_id = author_id if author_id else "NULL"
        if type not in ["custom", "info", "warning", "error", "technical"]:
            type = "technical"
        if type == "warning": war(message)
        elif type == "error": err(message)
        elif type != "custom": log(message)
        meta = self.check_meta_for_sql(meta)
        message = sql_escape(message)
        details = "'" + sql_escape(details) + "'" if details else "NULL"
        # Execute query
        sql = "INSERT INTO event (author_id, type, meta, message, details) VALUES ({0}, '{1}', {2}, '{3}', {4}) RETURNING id;".format(author_id, type, meta, message, details)
        event_id = execute(sql).first()[0]

        # Notify client of new event
        core.notify_all(data={"action": "new_event", "data" : {
            "author_id": author_id, 
            "date": datetime.datetime.now().isoformat(), 
            "message": message,
            "type": type,
            "meta": meta
            }})
        return event_id
コード例 #4
0
    def log(self, author_id, type, meta, message, details=None):
        """
            Create an event
        """
        from core.core import core
        # Check data
        author_id = author_id if author_id else "NULL"
        if type not in ["custom", "info", "warning", "error", "technical"]:
            type = "technical"
        if type == "warning": war(message)
        elif type == "error": err(message)
        elif type != "custom": log(message)
        meta = self.check_meta_for_sql(meta)
        message = sql_escape(message)
        details = "'" + sql_escape(details) + "'" if details else "NULL"
        # Execute query
        sql = "INSERT INTO event (author_id, type, meta, message, details) VALUES ({0}, '{1}', {2}, '{3}', {4}) RETURNING id;".format(
            author_id, type, meta, message, details)
        event_id = execute(sql).first()[0]

        # Notify client of new event
        core.notify_all(
            data={
                "action": "new_event",
                "data": {
                    "author_id": author_id,
                    "date": datetime.datetime.now().isoformat(),
                    "message": message,
                    "type": type,
                    "meta": meta
                }
            })
        return event_id
コード例 #5
0
ファイル: job_manager.py プロジェクト: REGOVAR/Pirus
    def set_status(self, job, new_status, notify=True, asynch=True):
        from core.core import core
        # Avoid useless notification
        # Impossible to change state of a job in error or canceled
        if (new_status != "running" and job.status == new_status) or job.status in  ["error", "canceled"]:
            return
        # Update status
        job.status = new_status
        job.save()

        # Need to do something according to the new status ?
        # Nothing to do for status : "waiting", "initializing", "running", "finalizing"
        if job.status in ["pause", "error", "done", "canceled"]:
            s = session()
            next_jobs = s.query(Job).filter_by(status="waiting").order_by("priority").all()
            if len(next_jobs) > 0:
                if asynch: 
                    run_async(self.start, (next_jobs[0].id, asynch,))
                else:
                    self.start(next_jobs[0].id, asynch)
        elif job.status == "finalizing":
            # if asynch: 
            #     run_async(self.finalize, (job.id, asynch,))
            # else:
            self.finalize(job.id, asynch)
        # Push notification
        if notify:
            core.notify_all(msg={"action": "job_updated", "data" : [job.to_json()]})
コード例 #6
0
ファイル: handlers.py プロジェクト: REGOVAR/regovar-server
    async def get(self, request):
        peername = request.transport.get_extra_info('peername')
        if peername is not None:
            host, port = peername

        ws_id = "{}:{}".format(host, port)
        ws = web.WebSocketResponse()
        await ws.prepare(request)

        WebsocketHandler.socket_list.append((ws, ws_id))
        msg = {'msg':'hello', 'data': [[str(_ws[1]) for _ws in WebsocketHandler.socket_list]]}
        core.notify_all(msg=msg)

        try:
            async for msg in ws:
                if msg.tp == aiohttp.MsgType.text:
                    if msg.data == 'close':
                        log ('CLOSE MESSAGE RECEIVED')
                        await ws.close()
                    else:
                        # Analyse message sent by client and send response if needed
                        data = msg.json()
                        if data['msg'] == 'user_info':
                            log('WebsocketHandler {0} '.format(data['msg']))
                            pass
                        elif msg.tp == aiohttp.MsgType.error:
                            log('ws connection closed with exception {0}'.format(ws.exception()))
        finally:
            WebsocketHandler.socket_list.remove((ws, ws_id))

        return ws
コード例 #7
0
ファイル: analysis_manager.py プロジェクト: REGOVAR/REGOVAR
 def update_analysis_async(analysis, filter_id, data):
     from core.model import Filter
     total_results = core.filters.update_wt(analysis, "filter_{}".format(filter_id), data["filter"])
     filter = Filter.from_id(filter_id)
     filter.total_variants = execute("SELECT COUNT(DISTINCT variant_id) FROM wt_{} WHERE filter_{}".format(analysis.id, filter_id)).first()[0]
     filter.total_results = total_results
     filter.progress = 1
     filter.save()
     core.notify_all(data={'action':'filter_update', 'data': filter.to_json()})
コード例 #8
0
ファイル: file_handler.py プロジェクト: DeepInThought/Regovar
 def complete(self, checksum=None, checksum_type="md5"):
     try:
         log('Upload of the file (id={0}) is complete.'.format(self.id))
         core.files.upload_finish(self.id, checksum, checksum_type)
         f = File.from_id(self.id)
         core.notify_all(data={
             "action": "file_upload",
             "data": f.to_json()
         })
     except Exception as ex:
         return TusManager.build_response(
             code=500, body="Unexpected error occured: {}".format(ex))
コード例 #9
0
 def update_analysis_async(analysis, filter_id, data):
     from core.model import Filter
     total_results = core.filters.update_wt(
         analysis, "filter_{}".format(filter_id), data["filter"])
     filter = Filter.from_id(filter_id)
     filter.total_variants = execute(
         "SELECT COUNT(DISTINCT variant_id) FROM wt_{} WHERE filter_{}".
         format(analysis.id, filter_id)).first()[0]
     filter.total_results = total_results
     filter.progress = 1
     filter.save()
     core.notify_all(data={
         'action': 'filter_update',
         'data': filter.to_json()
     })
コード例 #10
0
ファイル: file_handler.py プロジェクト: DeepInThought/Regovar
 def save(self):
     from core.core import core
     try:
         f = File.from_id(self.id)
         f.upload_offset = self.upload_offset
         f.save()
         core.notify_all(
             data={
                 "action": "file_upload",
                 "data": f.to_json(
                     ["id", "size", "upload_offset", "status"])
             })
     except Exception as ex:
         return TusManager.build_response(
             code=500, body="Unexpected error occured: {}".format(ex))
コード例 #11
0
ファイル: job_handler.py プロジェクト: DeepInThought/Regovar
    async def update_status(self, request):
        # 1- Retrieve data from request
        data = await request.json()
        job_id = request.match_info.get('job_id', -1)
        try:
            job = Job.from_id(job_id)
            new_status = data.pop("status") if "status" in data else None
            job.load(data)
            if new_status:
                print("JOB STATUS CHANGE: " + new_status)
                core.jobs.set_status(job, new_status)
            else:
                core.notify_all({"action": "job_updated", "data" : job.to_json(["id", "update_date", "status", "progress_value", "progress_label", "logs"])})
            
        except Exception as ex:
            return rest_error("Unable to update information for the jobs with id {}. {}".format(job_id, ex))

        return rest_success(check_local_path(job.to_json()))
コード例 #12
0
ファイル: handlers.py プロジェクト: REGOVAR/Pirus
    async def get(self, request):
        peername = request.transport.get_extra_info('peername')
        if peername is not None:
            host, port = peername

        ws_id = "{}:{}".format(host, port)
        ws = web.WebSocketResponse()
        await ws.prepare(request)

        print('WS connection open by', ws_id)
        WebsocketHandler.socket_list.append((ws, ws_id))
        msg = '{"action":"online_user", "data" : [' + ','.join(
            ['"' + _ws[1] + '"'
             for _ws in WebsocketHandler.socket_list]) + ']}'
        core.notify_all(msg=msg)

        try:
            async for msg in ws:
                if msg.tp == aiohttp.MsgType.text:
                    if msg.data == 'close':
                        print('CLOSE MESSAGE RECEIVED')
                        await ws.close()
                    else:
                        # Analyse message sent by client and send response if needed
                        data = msg.json()
                        if data["action"] == "user_info":
                            print("WebsocketHandler", data["action"])
                            pass
                        elif msg.tp == aiohttp.MsgType.error:
                            print('ws connection closed with exception %s' %
                                  ws.exception())
        finally:
            print('WS connection closed for', ws_id)
            WebsocketHandler.socket_list.remove((ws, ws_id))

        return ws
コード例 #13
0
ファイル: vcf_manager.py プロジェクト: DeepInThought/Regovar
    async def import_data(self, file_id, **kargs):
        """
            Import samples, variants and annotations from the provided file.
            This method check provided parameters and parse the header of the vcf to get samples and compute the number of line
            that need to be parse to allow us to compute a progress indicator. The parsing is done in delegate called in another thread.
            Return the list of sample that have been added.
        """
        from core.core import core
        file = Model.File.from_id(file_id)
        filepath = file.path
        reference_id = kargs["reference_id"]
        start_0 = datetime.datetime.now()
        job_in_progress = []

        vcf_metadata = prepare_vcf_parsing(reference_id, filepath)
        db_ref_suffix = "_" + Model.execute(
            "SELECT table_suffix FROM reference WHERE id={}".format(
                reference_id)).first().table_suffix

        if filepath.endswith(".vcf") or filepath.endswith(".vcf.gz"):
            filepath += ".regovar_import"  # a tmp file have been created by prepare_vcf_parsing() method to avoid pysam unsupported file format.
            start = datetime.datetime.now()

            # Create vcf parser
            vcf_reader = VariantFile(filepath)

            # get samples in the VCF
            # samples = {i : Model.get_or_create(Model.Session(), Model.Sample, name=i)[0] for i in list((vcf_reader.header.samples))}
            samples = {}
            for i in list((vcf_reader.header.samples)):
                sample = Model.Sample.new()
                sample.name = i
                sample.file_id = file_id
                sample.reference_id = reference_id
                sample.filter_description = {
                    filter[0]: filter[1].description
                    for filter in vcf_reader.header.filters.items()
                }
                sample.default_dbuid = []
                sample.status = "loading"
                for dbname in vcf_metadata["annotations"].keys():
                    if vcf_metadata["annotations"][dbname]:
                        sample.default_dbuid.append(
                            vcf_metadata["annotations"][dbname].db_uid)
                # TODO : is_mosaic according to the data in the vcf
                sample.save()

                # As these sample will be shared with other threads, we remove them from the sql session to avoid error
                samples.update({i: sample.to_json()})

            if len(samples.keys()) == 0:
                war("VCF files without sample cannot be imported in the database."
                    )
                core.notify_all({
                    "action": "import_vcf_error",
                    "data": {
                        "reference_id":
                        reference_id,
                        "file_id":
                        file_id,
                        "msg":
                        "VCF files without sample cannot be imported in the database."
                    }
                })
                return

            # tasks queue shared by all thread
            self.queue = Queue(maxsize=0)
            # list of worker created to execute multithread tasks
            self.workers = []

            # init threading workers
            for i in range(VCF_IMPORT_MAX_THREAD):
                t = Thread(target=vcf_import_worker,
                           args=(self.queue, file_id, samples),
                           daemon=True)
                t.start()
                self.workers.append(t)

            core.notify_all({
                "action": "import_vcf_start",
                "data": {
                    "reference_id":
                    reference_id,
                    "file_id":
                    file_id,
                    "samples": [{
                        "id": samples[sid]["id"],
                        "name": samples[sid]["name"]
                    } for sid in samples.keys()]
                }
            })
            records_count = vcf_metadata["count"]
            log("Importing file {0}\n\r\trecords  : {1}\n\r\tsamples  :  ({2}) {3}\n\r\tstart    : {4}"
                .format(filepath, records_count, len(samples.keys()),
                        reprlib.repr([sid for sid in samples.keys()]), start))

            run_async(self.import_delegate, file_id, vcf_reader, reference_id,
                      db_ref_suffix, vcf_metadata, samples)

            return {
                "success": True,
                "samples": samples,
                "records_count": records_count
            }
        return {"success": False, "error": "File not supported"}
コード例 #14
0
ファイル: vcf_manager.py プロジェクト: DeepInThought/Regovar
    def import_delegate(self, file_id, vcf_reader, reference_id, db_ref_suffix,
                        vcf_metadata, samples):
        """
            This delegate will do the "real" import.
            It will be called by the "import_data" method in a new thread in order to don't block the main thread
        """
        from core.core import core
        # parsing vcf file
        records_count = vcf_metadata['count']
        records_current = 0
        vcf_line = vcf_metadata['header_count']
        table = "variant" + db_ref_suffix

        sql_pattern1 = "INSERT INTO {0} (chr, pos, ref, alt, is_transition, bin, sample_list) VALUES ({1}, {2}, '{3}', '{4}', {5}, {6}, array[{7}]) ON CONFLICT (chr, pos, ref, alt) DO UPDATE SET sample_list=array_intersect({0}.sample_list, array[{7}])  WHERE {0}.chr={1} AND {0}.pos={2} AND {0}.ref='{3}' AND {0}.alt='{4}';"
        sql_pattern2 = "INSERT INTO sample_variant" + db_ref_suffix + " (sample_id, variant_id, vcf_line, bin, chr, pos, ref, alt, genotype, depth, depth_alt, quality, filter) SELECT {0}, id, {1}, {2}, '{3}', {4}, '{5}', '{6}', {7}, {8}, {9}, {10}, '{11}' FROM variant" + db_ref_suffix + " WHERE bin={2} AND chr={3} AND pos={4} AND ref='{5}' AND alt='{6}' ON CONFLICT (sample_id, variant_id) DO NOTHING;"

        sql_annot_trx = "INSERT INTO {0} (variant_id, bin,chr,pos,ref,alt, regovar_trx_id, {1}) SELECT id, {3},{4},{5},'{6}','{7}', '{8}', {2} FROM variant" + db_ref_suffix + " WHERE bin={3} AND chr={4} AND pos={5} AND ref='{6}' AND alt='{7}' ON CONFLICT (variant_id, regovar_trx_id) DO  NOTHING; "  # TODO : do update on conflict
        sql_annot_var = "INSERT INTO {0} (variant_id, bin,chr,pos,ref,alt, {1}) SELECT id, {3},{4},{5},'{6}','{7}', {2} FROM variant" + db_ref_suffix + " WHERE bin={3} AND chr={4} AND pos={5} AND ref='{6}' AND alt='{7}' ON CONFLICT (variant_id) DO  NOTHING;"

        sql_query1 = ""
        sql_query2 = ""
        sql_query3 = ""
        count = 0

        for row in vcf_reader:
            records_current += 1
            vcf_line += 1
            #log("> {} : {}".format(records_current, count))
            #if records_current == 14356:
            #ipdb.set_trace()

            # TODO : update sample's progress indicator

            chrm = normalize_chr(str(row.chrom))

            for allele in row.alleles:
                pos, ref, alt = normalise(row.pos, row.ref, allele)
                bin = getMaxUcscBin(pos, pos + len(ref))

                # get list of sample that have this variant (chr-pos-ref-alt)
                samples_array = []
                for sn in row.samples:
                    sp = row.samples.get(sn)
                    if allele in sp.alleles:
                        samples_array.append(samples[sp.name]["id"])
                if len(samples_array) == 0: continue
                # save variant
                samples_array = ",".join([str(s) for s in samples_array])
                sql_query1 += sql_pattern1.format(table, chrm, pos, ref, alt,
                                                  is_transition(ref, alt), bin,
                                                  samples_array)

                # Register variant/sample associations
                for sn in row.samples:
                    sp = row.samples.get(sn)
                    gt = normalize_gt(sp)
                    filters = escape_value_for_sql(
                        json.dumps(row.filter.keys()))
                    count += 1
                    if allele in sp.alleles:
                        if "AD" in sp.keys():
                            # Get allelic depth if exists (AD field)
                            depth_alt = sp["AD"][sp.alleles.index(allele)]
                        elif "DP4" in sp.keys():
                            if gt == 0:
                                depth_alt = sum(sp["DP4"])
                            else:
                                depth_alt = sp["DP4"][2] + sp["DP4"][
                                    3] if alt != ref else sp["DP4"][0] + sp[
                                        "DP4"][1]
                        else:
                            depth_alt = "NULL"

                        sql_query2 += sql_pattern2.format(
                            samples[sn]["id"],
                            vcf_line, bin, chrm, pos, ref, alt, gt,
                            get_info(sp, "DP"), depth_alt, row.qual, filters)
                    else:
                        # save that the sample HAVE NOT this variant
                        sql_query2 += sql_pattern2.format(
                            samples[sn]["id"],
                            vcf_line, bin, chrm, pos, ref, alt, "NULL",
                            get_info(sp, "DP"), "NULL", row.qual, filters)

                # Register variant annotations
                for ann_name, importer in vcf_metadata["annotations"].items():
                    if importer:
                        importer_query, importer_count = importer.import_annotations(
                            sql_annot_trx, bin, chrm, pos, ref, alt, row.info)
                        sql_query3 += importer_query
                        count += importer_count

            # split big request to avoid sql out of memory transaction or too long freeze of the server
            if count >= 5000:
                progress = records_current / records_count
                count = 0
                transaction = sql_query1 + sql_query2 + sql_query3
                log("VCF import : line {} (chrm {})".format(
                    records_current, chrm))
                log("VCF import : Execute sync query {}/{} ({}%)".format(
                    records_current, records_count, round(progress * 100, 2)))

                # update sample's progress indicator
                # note : as we are updating lot of data in the database with several asynch thread
                #        so to avoid conflict with session, we update data from "manual query"
                sps = []
                sql = "UPDATE sample SET loading_progress={} WHERE id IN ({})".format(
                    progress,
                    ",".join([str(samples[sid]["id"]) for sid in samples]))
                Model.execute(sql)
                core.notify_all({
                    "action": "import_vcf_processing",
                    "data": {
                        "reference_id":
                        reference_id,
                        "file_id":
                        file_id,
                        "status":
                        "loading",
                        "progress":
                        progress,
                        "samples": [{
                            "id": samples[sname]["id"],
                            "name": sname
                        } for sname in samples]
                    }
                })

                log("VCF import : enqueue query")
                self.queue.put(transaction)
                # Reset query buffers
                sql_query1 = ""
                sql_query2 = ""
                sql_query3 = ""

        # Loop done, execute last pending query
        log("VCF import : Execute last async query")
        transaction = sql_query1 + sql_query2 + sql_query3
        if transaction:
            self.queue.put(transaction)

        # Waiting that all query in the queue was executed
        log("VCF parsing done. Waiting for async execution of sql queries")

        # block until all tasks are done
        self.queue.join()
        log("No more sql query to proceed")

        # stop vcf_import_thread_workers
        for i in range(VCF_IMPORT_MAX_THREAD):
            self.queue.put(None)
        for t in self.workers:
            t.join()

        # Compute composite variant by sample
        sql_pattern = "UPDATE sample_variant" + db_ref_suffix + " u SET is_composite=TRUE WHERE u.sample_id = {0} AND u.variant_id IN (SELECT DISTINCT UNNEST(sub.vids) as variant_id FROM (SELECT array_agg(v.variant_id) as vids, g.name2 FROM sample_variant" + db_ref_suffix + " v INNER JOIN refgene" + db_ref_suffix + " g ON g.chr=v.chr AND g.trxrange @> v.pos WHERE v.sample_id={0} AND v.genotype=2 or v.genotype=3 GROUP BY name2 HAVING count(*) > 1) AS sub)"
        log("Computing is_composite fields by samples :")
        for sid in samples:
            query = sql_pattern.format(samples[sid]["id"])
            log(" - sample {}".format(samples[sid]["id"]))
            Model.execute(query)
        log("Sample import from VCF Done")
        end = datetime.datetime.now()

        # update sample's progress indicator
        Model.execute(
            "UPDATE sample SET status='ready', loading_progress=1  WHERE id IN ({})"
            .format(",".join([str(samples[sid]["id"]) for sid in samples])))

        core.notify_all({
            "action": "import_vcf_end",
            "data": {
                "reference_id":
                reference_id,
                "file_id":
                file_id,
                "msg":
                "Import done without error.",
                "samples": [{
                    "id": samples[s]["id"],
                    "name": samples[s]["name"]
                } for s in samples.keys()]
            }
        })

        # When import is done, check if analysis are waiting for creation and then start wt creation if all sample are ready
        # TODO
        sql = "SELECT DISTINCT(analysis_id) FROM analysis_sample WHERE sample_id IN ({})".format(
            ",".join([str(samples[sid]["id"]) for sid in samples]))
        for row in Model.execute(sql):
            analysis = Model.Analysis.from_id(row.analysis_id, 1)
            if analysis.status == "waiting":
                log("Auto initialisation of the analysis in witing state : {} ({})"
                    .format(analysis.name, analysis.id))
                core.filters.request(analysis.id, analysis.filter,
                                     analysis.fields)
コード例 #15
0
    def update_working_table(self, analysis_id, sample_ids, field_uids, dbs_uids, filter_ids=[], attributes={}):
        """
            Update annotation of the working table of an analysis. The working table shall already exists
        """
        from core.core import core
        # Get list of fields to add in the wt
        analysis = Analysis.from_id(analysis_id)
        total = analysis.total_variants
        diff_fields = []
        diff_dbs = []
        progress = {"msg": "wt_processing", "start": datetime.datetime.now().ctime(), "analysis_id": analysis_id, "step": 2, "progress_total": total, "progress_current": 0}
        core.notify_all(progress)
        try:
            query = "SELECT column_name FROM information_schema.columns WHERE table_name='wt_{}'".format(analysis_id)
            current_fields = [row.column_name if row.column_name[0] != '_' else row.column_name[1:] for row in execute(query)]
            current_dbs = []
            for f_uid in current_fields:
                 if f_uid in self.fields_map and self.fields_map[f_uid]['db_uid'] not in current_dbs:
                    current_dbs.append(self.fields_map[f_uid]['db_uid'])
            for f_uid in field_uids:
                if f_uid not in current_fields and self.fields_map[f_uid]['db_name_ui'] != 'Variant':
                    diff_fields.append('_{}'.format(f_uid))
                    if self.fields_map[f_uid]['db_uid'] not in diff_dbs and self.fields_map[f_uid]['db_uid'] not in current_dbs:
                        diff_dbs.append(self.fields_map[f_uid]['db_uid'])
        except:
            # working table doesn't exist
            return False

        # Alter working table to add new fields
        pattern = "ALTER TABLE wt_{0} ADD COLUMN {1}{2} {3};"
        query = ""
        update_queries = []
        for f_uid in diff_fields:
            if f_uid[0] == '_':
                f_uid = f_uid[1:]
            query += pattern.format(analysis_id, '_', f_uid, self.sql_type_map[self.fields_map[f_uid]['type']])
        for a_name in attributes.keys():
            att_checked = []
            for sid, att in attributes[a_name].items():
                if 'attr_{}_{}'.format(a_name.lower(), att.lower()) in current_fields:
                    # We consider that if the first key_value for the attribute is define, the whole attribute's columns are defined,
                    # So break and switch to the next attribute.
                    # That's why before updating and attribute-value, we need before to drop all former columns in the wt 
                    break;
                else:
                    if att not in att_checked:
                        att_checked.append(att)
                        query += pattern.format(analysis_id, 'attr_', "{}_{}".format(a_name.lower(), att.lower()), 'boolean DEFAULT False')
                        update_queries.append("UPDATE wt_{} SET attr_{}_{}=True WHERE s{}_gt IS NOT NULL; ".format(analysis_id, a_name.lower(), att.lower(), sid))
        for f_id in filter_ids:
            if 'filter_{}'.format(f_id) not in current_fields:
                query += pattern.format(analysis_id, 'filter_', f_id, 'boolean DEFAULT False')
                f_filter = json.loads(execute("SELECT filter FROM filter WHERE id={}".format(f_id)).first().filter)
                q = self.build_query(analysis_id, analysis.reference_id, 'table', f_filter, [], None, None)
                queries = q[0]
                if len(queries) > 0:
                    # add all query to create temps tables needed by the filter if they do not yet exists
                    for q in queries[:-1]:
                        query += q
                    # add the query to update wt with the filter
                    # Note : As transcript_pk_field_uid and transcript_pk_field_value may be null, we cannot use '=' operator and must use 'IS NOT DISTINCT FROM' 
                    #        as two expressions that return 'null' are not considered as equal in SQL.
                    update_queries.append("UPDATE wt_{0} SET filter_{1}=True FROM ({2}) AS _sub WHERE wt_{0}.variant_id=_sub.variant_id AND wt_{0}.transcript_pk_field_uid IS NOT DISTINCT FROM _sub.transcript_pk_field_uid AND wt_{0}.transcript_pk_value IS NOT DISTINCT FROM _sub.transcript_pk_value ; ".format(analysis_id, f_id, queries[-1].strip()[:-1]))
        if query != "":
            # Add new annotation columns to the working table
            execute(query)
        progress.update({"step": 3})
        core.notify_all(progress)

        # Loop over new annotation's databases, because if new: need to add new transcripts to the working table
        fields_to_copy_from_variant = ["variant_id","bin","chr","pos","ref","alt","is_transition","sample_tlist","sample_tcount","sample_alist","sample_acount","depth"]
        fields_to_copy_from_variant.extend(['s{}_gt'.format(s) for s in sample_ids])
        fields_to_copy_from_variant.extend(['s{}_dp'.format(s) for s in sample_ids])
        fields_to_copy_from_variant.extend(['attr_{}'.format(a.lower()) for a in attributes.keys()])
        fields_to_copy_from_variant.extend(['filter_{}'.format(f) for f in filter_ids])
        pattern = "INSERT INTO wt_{0} (annotated, transcript_pk_field_uid, transcript_pk_value, {1}) \
        SELECT False, '{2}', {4}.transcript_id, {3} \
        FROM (SELECT {1} FROM wt_{0} WHERE transcript_pk_field_uid IS NULL) AS _var \
        INNER JOIN {4} ON _var.variant_id={4}.variant_id" # TODO : check if more optim to select with JOIN ON bin/chr/pos/ref/alt
        for uid in diff_dbs:
            if self.db_map[uid]["type"] == "transcript":
                query = pattern.format(analysis_id,
                                       ', '.join(fields_to_copy_from_variant),
                                       self.db_map[uid]["db_pk_field_uid"],
                                       ', '.join(["_var.{}".format(f) for f in fields_to_copy_from_variant]),
                                       self.db_map[uid]["name"])
                execute(query)
        progress.update({"step": 4})
        core.notify_all(progress)

        # Create update query to retrieve annotation
        UPDATE_LOOP_RANGE = 1000
        to_update = {}
        for f_uid in diff_fields:
            if self.fields_map[f_uid[1:]]['db_uid'] not in to_update.keys():
                to_update[self.fields_map[f_uid[1:]]['db_uid']] = []
            to_update[self.fields_map[f_uid[1:]]['db_uid']].append({
                "name": self.fields_map[f_uid[1:]]['name'], 
                "uid":f_uid[1:], 
                "db_name": self.fields_map[f_uid[1:]]['db_name']})
        # Loop to update working table annotation (queries "packed" fields requested by annotation's database)
        for db_uid in to_update.keys():
            if self.db_map[db_uid]["type"] == "transcript":
                qset_ann = ', '.join(['_{0}=_ann._{0}'.format(f["uid"]) for f in to_update[db_uid]])
                qslt_ann = ','.join(['{0}.{1} AS _{2}'.format(f['db_name'], f["name"], f["uid"]) for f in to_update[db_uid]])
                qslt_var = "SELECT variant_id, bin, chr, pos, ref, alt, transcript_pk_value FROM wt_{0} WHERE annotated=False AND transcript_pk_field_uid='{1}' LIMIT {2}".format(analysis_id, self.db_map[self.fields_map[f_uid[1:]]['db_uid']]['db_pk_field_uid'], UPDATE_LOOP_RANGE)
                qjoin = 'LEFT JOIN {0} '.format(self.db_map[db_uid]['join'].format('_var'))
                query = "UPDATE wt_{0} SET annotated=True, {1} FROM (SELECT _var.variant_id, _var.transcript_pk_value, {2} FROM ({3}) AS _var {4}) AS _ann \
                    WHERE wt_{0}.variant_id=_ann.variant_id AND wt_{0}.transcript_pk_field_uid='{5}' AND wt_{0}.transcript_pk_value=_ann.transcript_pk_value".format(
                    analysis_id, 
                    qset_ann, 
                    qslt_ann, 
                    qslt_var, 
                    qjoin,
                    self.db_map[self.fields_map[f_uid[1:]]['db_uid']]['db_pk_field_uid'])
            else:
                qset_ann = ', '.join(['{0}=_ann._{0}'.format(f_uid) for f_uid in diff_fields])
                qslt_ann = ','.join(['{0}.{1} AS _{2}'.format(self.fields_map[f_uid[1:]]['db_name'], self.fields_map[f_uid[1:]]['name'], f_uid) for f_uid in diff_fields])
                qslt_var = 'SELECT variant_id, bin, chr, pos, ref, alt FROM wt_{0} WHERE annotated=False AND transcript_pk_field_uid IS NULL LIMIT {1}'.format(analysis_id, UPDATE_LOOP_RANGE)
                qjoin = ' '.join(['LEFT JOIN {0} '.format(self.db_map[db_uid]['join'].format('_var'), self.db_map[db_uid]) for db_uid in diff_dbs])
                query = "UPDATE wt_{0} SET annotated=True, {1} FROM (SELECT _var.variant_id, {2} FROM ({3}) AS _var {4}) AS _ann WHERE wt_{0}.variant_id=_ann.variant_id".format(analysis_id, qset_ann, qslt_ann, qslt_var, qjoin)

            if qset_ann != "":
                # Mark all variant as not annotated (to be able to do a "resumable update")
                execute("UPDATE wt_{} SET annotated=False".format(analysis_id))
                for page in range(0, total, UPDATE_LOOP_RANGE):
                    execute(query)
                    progress.update({"progress_current": page})
                    core.notify_all(progress)
            progress.update({"step": 5, "progress_current": total})
            core.notify_all(progress)

        # Apply queries to update attributes and filters columns in the wt
        if len(update_queries) > 0:
            execute("".join(update_queries))
        progress.update({"step": 6})
        core.notify_all(progress)

        # Update count stat of the analysis
        query = "UPDATE analysis SET status='READY' WHERE id={}".format(analysis_id)
        execute(query)
コード例 #16
0
ファイル: vcf_manager.py プロジェクト: REGOVAR/REGOVAR
    def import_delegate(self, file_id, vcf_reader, reference_id, db_ref_suffix, vcf_metadata, samples):
        """
            This delegate will do the "real" import.
            It will be called by the "import_data" method in a new thread in order to don't block the main thread
        """
        from core.core import core
        # parsing vcf file
        records_count = vcf_metadata['count']
        records_current = 0
        vcf_line = vcf_metadata['header_count']
        table = "variant" + db_ref_suffix
        
        sql_pattern1 = "INSERT INTO {0} (chr, pos, ref, alt, is_transition, bin, sample_list) VALUES ({1}, {2}, '{3}', '{4}', {5}, {6}, array[{7}]) ON CONFLICT (chr, pos, ref, alt) DO UPDATE SET sample_list=array_intersect({0}.sample_list, array[{7}])  WHERE {0}.chr={1} AND {0}.pos={2} AND {0}.ref='{3}' AND {0}.alt='{4}';"
        sql_pattern2 = "INSERT INTO sample_variant" + db_ref_suffix + " (sample_id, variant_id, vcf_line, bin, chr, pos, ref, alt, genotype, depth, depth_alt, quality, filter) SELECT {0}, id, {1}, {2}, '{3}', {4}, '{5}', '{6}', {7}, {8}, {9}, {10}, '{11}' FROM variant" + db_ref_suffix + " WHERE bin={2} AND chr={3} AND pos={4} AND ref='{5}' AND alt='{6}' ON CONFLICT (sample_id, variant_id) DO NOTHING;"
        
        sql_annot_trx = "INSERT INTO {0} (variant_id, bin,chr,pos,ref,alt, regovar_trx_id, {1}) SELECT id, {3},{4},{5},'{6}','{7}', '{8}', {2} FROM variant" + db_ref_suffix + " WHERE bin={3} AND chr={4} AND pos={5} AND ref='{6}' AND alt='{7}' ON CONFLICT (variant_id, regovar_trx_id) DO  NOTHING; " # TODO : do update on conflict
        sql_annot_var = "INSERT INTO {0} (variant_id, bin,chr,pos,ref,alt, {1}) SELECT id, {3},{4},{5},'{6}','{7}', {2} FROM variant" + db_ref_suffix + " WHERE bin={3} AND chr={4} AND pos={5} AND ref='{6}' AND alt='{7}' ON CONFLICT (variant_id) DO  NOTHING;"

        sql_query1 = ""
        sql_query2 = ""
        sql_query3 = ""
        count = 0
        
        for row in vcf_reader: 
            records_current += 1 
            vcf_line += 1
            #log("> {} : {}".format(records_current, count))
            #if records_current == 14356:
                #ipdb.set_trace()
                    
            # TODO : update sample's progress indicator
            
            
            chrm = normalize_chr(str(row.chrom))
            
            for allele in row.alleles:
                pos, ref, alt = normalise(row.pos, row.ref, allele)
                bin = getMaxUcscBin(pos, pos + len(ref))
                
                # get list of sample that have this variant (chr-pos-ref-alt)
                samples_array = []
                for sn, sp in row.samples.items():
                    if allele in sp.alleles:
                        samples_array.append(samples[sp.name]["id"])
                if len(samples_array) == 0: continue
                # save variant
                samples_array = ",".join([str(s) for s in samples_array])
                sql_query1 += sql_pattern1.format(table, chrm, pos, ref, alt, is_transition(ref, alt), bin, samples_array)
                        
                # Register variant/sample associations
                for sn, sp in row.samples.items():
                    gt = normalize_gt(sp)
                    filters = escape_value_for_sql(json.dumps(row.filter.keys()))
                    count += 1
                    if allele in sp.alleles:
                        if "AD" in sp.keys():
                            # Get allelic depth if exists (AD field)
                            depth_alt = sp["AD"][sp.alleles.index(allele)] 
                        elif "DP4" in sp.keys():
                            if gt == 0:
                                depth_alt = sum(sp["DP4"])
                            else:
                                depth_alt = sp["DP4"][2] + sp["DP4"][3] if alt != ref else sp["DP4"][0] + sp["DP4"][1]
                        else :
                            depth_alt = "NULL"
                        
                        sql_query2 += sql_pattern2.format(samples[sn]["id"], vcf_line, bin, chrm, pos, ref, alt, gt, get_info(sp, "DP"), sqlc(depth_alt), sqlc(row.qual), filters)
                    else:
                        # save that the sample HAVE NOT this variant
                        sql_query2 += sql_pattern2.format(samples[sn]["id"], vcf_line, bin, chrm, pos, ref, alt, "NULL", get_info(sp, "DP"), "NULL", sqlc(row.qual), filters)
                
                # Register variant annotations
                for ann_name, importer in vcf_metadata["annotations"].items():
                    if importer:
                        importer_query, importer_count = importer.import_annotations(sql_annot_trx, bin, chrm, pos, ref, alt, row.info)
                        sql_query3 += importer_query
                        count += importer_count
                        
                            


            # split big request to avoid sql out of memory transaction or too long freeze of the server
            if count >= 1000:
                progress = records_current / records_count
                count = 0
                transaction = "BEGIN; " + sql_query1 + sql_query2 + sql_query3 + "COMMIT; "
                log("VCF import : line {} (chrm {})".format(records_current, chrm))
                log("VCF import : Execute sync query {}/{} ({}%)".format(records_current, records_count, round(progress * 100, 2)))
                
                    
                # update sample's progress indicator
                # note : as we are updating lot of data in the database with several asynch thread
                #        so to avoid conflict with session, we update data from "manual query"
                sps = []
                sql = "UPDATE sample SET loading_progress={} WHERE id IN ({})".format(progress, ",".join([str(samples[sid]["id"]) for sid in samples]))
                Model.execute(sql)
                core.notify_all({"action": "import_vcf_processing", "data" : {"reference_id": reference_id, "file_id" : file_id, "status" : "loading", "progress": progress, "samples": [ {"id" : samples[sname]["id"], "name" : sname} for sname in samples]}})
                
                #log("VCF import : enqueue query")
                #self.queue.put(transaction)
                log("VCF import : execute query")
                Model.execute(transaction)
                # Reset query buffers
                sql_query1 = ""
                sql_query2 = ""
                sql_query3 = ""

        # # Loop done, execute last pending query 
        # log("VCF import : Execute last async query")
        # transaction = sql_query1 + sql_query2 + sql_query3
        # if transaction:
        #     self.queue.put(transaction)


        # # Waiting that all query in the queue was executed
        # log("VCF parsing done. Waiting for async execution of sql queries")
        
        # # block until all tasks are done
        # self.queue.join()
        # log("No more sql query to proceed")
        
        # # stop vcf_import_thread_workers
        # for i in range(VCF_IMPORT_MAX_THREAD):
        #     self.queue.put(None)
        # for t in self.workers:
        #     t.join()

        # Compute composite variant by sample
        sql_pattern = "UPDATE sample_variant" + db_ref_suffix + " u SET is_composite=TRUE WHERE u.sample_id = {0} AND u.variant_id IN (SELECT DISTINCT UNNEST(sub.vids) as variant_id FROM (SELECT array_agg(v.variant_id) as vids, g.name2 FROM sample_variant" + db_ref_suffix + " v INNER JOIN refgene" + db_ref_suffix + " g ON g.chr=v.chr AND g.trxrange @> v.pos WHERE v.sample_id={0} AND v.genotype=2 or v.genotype=3 GROUP BY name2 HAVING count(*) > 1) AS sub)"
        log("Computing is_composite fields by samples :")
        # for sid in samples:
        #     query = sql_pattern.format(samples[sid]["id"])
        #     log(" - sample {}".format(samples[sid]["id"]))
        #     Model.execute(query)
        log("Sample import from VCF Done")
        end = datetime.datetime.now()
        
        # update sample's progress indicator
        Model.execute("UPDATE sample SET status='ready', loading_progress=1  WHERE id IN ({})".format(",".join([str(samples[sid]["id"]) for sid in samples])))
        
        core.notify_all({"action": "import_vcf_end", "data" : {"reference_id": reference_id, "file_id" : file_id, "msg" : "Import done without error.", "samples": [ {"id" : samples[s]["id"], "name" : samples[s]["name"]} for s in samples.keys()]}})


        # When import is done, check if analysis are waiting for creation and then start wt creation if all sample are ready 
        # TODO
        sql = "SELECT DISTINCT(analysis_id) FROM analysis_sample WHERE sample_id IN ({})".format(",".join([str(samples[sid]["id"]) for sid in samples]))
        for row in Model.execute(sql):
            analysis = Model.Analysis.from_id(row.analysis_id,1)
            if analysis.status == "waiting":
                log("Auto initialisation of the analysis in witing state : {} ({})".format(analysis.name, analysis.id))
                core.filters.request(analysis.id, analysis.filter, analysis.fields)
コード例 #17
0
    def create_working_table(self, analysis_id, sample_ids, field_uids, dbs_uids, filter_ids=[], attributes={}):
        """
            Create a working sql table for the analysis to improove speed of filtering/annotation.
            A Working table contains all variants used by the analysis, with all annotations used by filters or displayed
        """
        from core.core import core
        if len(sample_ids) == 0: raise RegovarException("No sample... so not able to retrieve data")

        db_ref_suffix= "hg19"  # execute("SELECT table_suffix FROM reference WHERE id={}".format(reference_id)).first().table_suffix
        progress = {"msg": "wt_processing", "start": datetime.datetime.now().ctime(), "analysis_id": analysis_id, "step": 1}
        core.notify_all(progress)
        # Create schema
        w_table = 'wt_{}'.format(analysis_id)
        query = "DROP TABLE IF EXISTS {0} CASCADE; CREATE TABLE {0} (\
            is_variant boolean DEFAULT False, \
            annotated boolean DEFAULT False, \
            variant_id bigint, \
            bin integer, \
            chr bigint, \
            pos integer, \
            ref text, \
            alt text,\
            transcript_pk_field_uid character varying(32), \
            transcript_pk_value character varying(100), \
            is_transition boolean, \
            sample_tlist integer[], \
            sample_tcount integer, \
            sample_alist integer[], \
            sample_acount integer, \
            depth integer, "
        query += ", ".join(["s{}_gt integer".format(i) for i in sample_ids]) + ", "
        query += ", ".join(["s{}_dp integer".format(i) for i in sample_ids]) 
        query += ", CONSTRAINT {0}_ukey UNIQUE (variant_id, transcript_pk_field_uid, transcript_pk_value));"
        execute(query.format(w_table))
        # Insert variant without annotation first
        query =  "INSERT INTO {0} (variant_id, bin, chr, pos, ref, alt, is_transition, sample_tlist) \
            SELECT DISTINCT sample_variant_{1}.variant_id, sample_variant_{1}.bin, sample_variant_{1}.chr, sample_variant_{1}.pos, sample_variant_{1}.ref, sample_variant_{1}.alt, \
                variant_{1}.is_transition, \
                variant_{1}.sample_list \
            FROM sample_variant_{1} INNER JOIN variant_{1} ON sample_variant_{1}.variant_id=variant_{1}.id \
            WHERE sample_variant_{1}.sample_id IN ({2}) \
            ON CONFLICT (variant_id, transcript_pk_field_uid, transcript_pk_value) DO NOTHING;"
        execute(query.format(w_table, db_ref_suffix, ','.join([str(i) for i in sample_ids])))
        # Complete sample-variant's associations
        for sid in sample_ids:
            execute("UPDATE {0} SET s{2}_gt=_sub.genotype, s{2}_dp=_sub.depth FROM (SELECT variant_id, genotype, depth FROM sample_variant_{1} WHERE sample_id={2}) AS _sub WHERE {0}.variant_id=_sub.variant_id".format(w_table, db_ref_suffix, sid))

        query = "UPDATE {0} SET \
            is_variant=(CASE WHEN ref<>alt THEN True ELSE False END), \
            sample_tcount=array_length(sample_tlist,1), \
            sample_alist=array_intersect(sample_tlist, array[{1}]), \
            sample_acount=array_length(array_intersect(sample_tlist, array[{1}]),1), \
            depth=GREATEST({2})"
        execute(query.format(w_table, ",".join([str(i) for i in sample_ids]), ", ".join(["s{}_dp".format(i) for i in sample_ids])))
        # Create indexes
        # FIXME : do we need to create index on boolean fields ? Is partition a better way to do for low cardinality fields : http://www.postgresql.org/docs/9.1/static/ddl-partitioning.html
        # query = "CREATE INDEX {0}_idx_ann ON {0} USING btree (annotated);".format(w_table)
        query = "CREATE INDEX {0}_idx_vid ON {0} USING btree (variant_id);".format(w_table)
        query += "CREATE INDEX {0}_idx_var ON {0} USING btree (bin, chr, pos, transcript_pk_field_uid, transcript_pk_value);".format(w_table)
        query += "CREATE INDEX {0}_idx_trx ON {0} USING btree (transcript_pk_field_uid, transcript_pk_value);".format(w_table)
        query += "".join(["CREATE INDEX {0}_idx_s{1}_gt ON {0} USING btree (s{1}_gt);".format(w_table, i) for i in sample_ids])
        query += "".join(["CREATE INDEX {0}_idx_s{1}_dp ON {0} USING btree (s{1}_dp);".format(w_table, i) for i in sample_ids])
        execute(query)
        # Update count stat of the analysis
        query = "UPDATE analysis SET total_variants=(SELECT COUNT(*) FROM {} WHERE is_variant), status='ANNOTATING' WHERE id={}".format(w_table, analysis_id)
        execute(query)
        # Update working table by computing annotation
        self.update_working_table(analysis_id, sample_ids, field_uids, dbs_uids, filter_ids, attributes)