Beispiel #1
0
    def process(self, params={}):
        self._MANAGERS = {
            'variation': variation_components.DataManager(),
            'treatment': treatment_components.DataManager(),
            'trait': trait_components.DataManager(),
            'publication': publication_components.DataManager(),
            'gene': gene_components.DataManager(),
            'exon': exon_components.DataManager(),
            'disease': disease_components.DataManager(),
            'chromosome': chromosome_components.DataManager(),
            'drug': drug_components.DataManager(),
        }

        print "[x] RECEIVING DATA"
        credentials = pika.PlainCredentials(settings.RABBITMQ_USER,
                                            settings.RABBITMQ_PASS)
        connection = pika.BlockingConnection(
            pika.ConnectionParameters(settings.RABBITMQ_HOST,
                                      settings.RABBITMQ_PORT, '/',
                                      credentials))
        channel = connection.channel()
        channel.queue_declare(queue=os.environ.get('GENODATA_QUEUE'))
        print "[*] Waiting for data. To exit press CTRL+C"
        channel.basic_consume(self.consume,
                              queue=os.environ.get('GENODATA_QUEUE'),
                              no_ack=True)
        channel.start_consuming()
Beispiel #2
0
    def process(self, params={}):
        fields = [
            '1000-genomes', 'allele_string', 'chromosome', 'genename',
            'vcf_RSPOS'
        ]

        print '[***] Starting'
        # Reset this field
        Variation.objects.filter(is_good_quality=True).update(
            is_good_quality=False)

        variation_manager = variation_components.DataManager()
        isDone = False
        start = 0

        while not isDone:
            end = start + 5000
            # print 'start: %s, end: %s' % (start, end)
            records = Variation.objects.all()[start:end]
            start = end + 1
            if records.count() <= 0:
                isDone = True

            is_good_quality = False
            ids = []

            for var in records:
                count = 0
                data = variation_manager.get(var.code)
                for i in fields:
                    if i in data and data[i]['value'] != None:
                        count += 1
                if count == len(fields):
                    ids.append(var.id)

            # Update database
            Variation.objects.filter(pk__in=ids).update(is_good_quality=True)
            print '[***] %s is good quality ---- DONE' % (len(ids))
Beispiel #3
0
    def process(self, params={}):
        _managers = {
            'variation': variation_components.DataManager(),
            'treatment': treatment_components.DataManager(),
            'trait': trait_components.DataManager(),
            'publication': publication_components.DataManager(),
            'gene': gene_components.DataManager(),
            'exon': exon_components.DataManager(),
            'disease': disease_components.DataManager(),
            'chromosome': chromosome_components.DataManager(),
            'drug': drug_components.DataManager(),
        }

        _filename = params.get('_FILE')
        if _filename:
            print "[x] RECEIVING DATA"
            try:
                with gzip.open('{}'.format(_filename), 'r') as f:
                    for line in f:
                        data = json.loads(line)
                        if len(data) >= 5:
                            field_not_exist = []
                            print "Entity: %s, Record: %s, Field: %s, Value: %s" % (
                                data[1], data[2], data[3], data[4])
                            try:
                                if _managers.get(data[1], None) != None:
                                    field_not_exist = _managers[data[1]].put(
                                        data[2], {data[3]: data[4]},
                                        source=data[0])
                            except Exception as e:
                                pass
                            if field_not_exist:
                                print "[**] Fields are not exist in Genodata: %s" % (
                                    field_not_exist)
            except Exception as e:
                raise e
        else:
            print "[Error] Expected gzip text file to import, empty given"
Beispiel #4
0
    def process(self, params={}):
        # DECLARE VARIABLE
        NAME_QUEUE = 'user-variation'
        RABBITMQ_HOST = settings.RABBITMQ_HOST
        RABBITMQ_PORT = int(settings.RABBITMQ_PORT)

        # Starting
        print "[x] Publish data to rabbitmq"
        ##########################
        ## Variation
        isDone = False
        start = 0
        manager = components.DataManager()
        while not isDone:
            end = start + 5000
            variation = Variation.objects.filter(
                is_good_quality=True)[start:end]
            start = end + 1
            if variation.count() <= 0:
                isDone = True

            x = []
            for var in variation:
                y = {'version': '0.1', 'rsnumber': var.code}
                try:
                    data = manager.get(var.code)
                    arr_disease = []
                    y['chromosome'] = data['chromosome']['value']
                    y['position'] = data['vcf_RSPOS']['value']
                    y['science_filter'] = ''
                    y['genes'] = data['genename']['value']

                    if data['publications']['value']:
                        y['publications'] = data['publications']['value']
                    if data['gwas-effects']['value']:
                        y['effects'] = []
                        for eff in data['gwas-effects']['value']:
                            effe = {}
                            effe['genotype'] = eff['genotype']
                            effe['odd_ratio'] = eff['odd_ratio']
                            effe['effect'] = eff['effect']
                            y['effects'].append(effe)
                    # disease
                    if data['disgenet-diseases']['value']:
                        arr_disease.extend(data['disgenet-diseases']['value'])
                    if data['gwas-diseases']['value']:
                        for k in data['gwas-diseases']['value']:
                            arr_disease.append({
                                'disease':
                                k.get('disease', ''),
                                'pubmedid':
                                k.get('pmid', ''),
                                'sentence':
                                k.get('sentence', '')
                            })
                    if len(arr_disease) > 0:
                        y['diseases'] = arr_disease

                except Exception as e:
                    pass
                x.append(y)

            # Publish rabbitMQ
            self.publish_to_queue(x, NAME_QUEUE, RABBITMQ_HOST, RABBITMQ_PORT)
            print "[***] DONE variation"
Beispiel #5
0
    def process(self, params={}):
        # DECLARE VARIABLE
        SEQUENCE_QUEUE = 'sequence-viewer-variation'
        RABBITMQ_HOST = settings.RABBITMQ_HOST
        RABBITMQ_PORT = int(settings.RABBITMQ_PORT)

        # Starting
        print "[x] Publish data to rabbitmq"

        ##########################
        ## Variation
        isDone = False
        start = 0
        variation_manager = variation_components.DataManager()
        fields = ['chromosome', 'genename', 'vcf_RSPOS']

        while not isDone:
            end = start + 5000

            variation = Variation.objects.all()[start:end]
            start = end + 1
            if variation.count() <= 0:
                isDone = True
            x = []

            for var in variation:
                count = 0
                y = ['variation', var.code]
                arr_disease = []
                data = variation_manager.get(var.code)
                for i in fields:
                    if i in data and data[i]['value'] != None:
                        count += 1
                if count == len(fields):
                    # disease
                    if data['disgenet-diseases']['value']:
                        arr_disease.extend(data['disgenet-diseases']['value'])
                    if data['gwas-diseases']['value']:
                        for k in data['gwas-diseases']['value']:
                            arr_disease.append({
                                'disease':
                                k.get('disease', ''),
                                'pubmedid':
                                k.get('pmid', ''),
                                'sentence':
                                k.get('sentence', '')
                            })

                    y.append({
                        'chromosome': data['chromosome']['value'],
                        'genename': data['genename']['value'],
                        'position': data['vcf_RSPOS']['value'],
                        'associated_diseases': arr_disease
                    })
                    x.append(y)

            if len(x) > 0:
                print "[***] starting publish to rabbitMQ"
                # Publish rabbitMQ
                self.publish_to_queue(x, SEQUENCE_QUEUE, RABBITMQ_HOST,
                                      RABBITMQ_PORT)
        print "[***] DONE variation"
Beispiel #6
0
    def process(self, params = {}):
        # DECLARE VARIABLE
        GENOME_QUEUE = 'genome-browser-variation'
        RABBITMQ_HOST = settings.RABBITMQ_HOST
        RABBITMQ_PORT = int(settings.RABBITMQ_PORT)

        # Starting
        print "[x] Publish data to rabbitmq"
        ##########################
        ## Variation
        isDone = False
        start = 0
        manager = variation_components.DataManager()
        while not isDone:
            end = start + 5000
            variation = Variation.objects.filter(is_good_quality=True)[start:end]
            start = end + 1
            if variation.count() <= 0:
                isDone = True

            x = []
            for var in variation:
                y = {'version': '0.1', 'rsnumber': var.code}
                try:
                    data = manager.get(var.code)
                    # print 'code: %s' % (code)
                    arr_disease = []
                    asso_disease = []
                    asso_pub = []
                    y['core_attributes'] = {
                        'chromosome': data['chromosome'].get('value', ''),
                        'position': data['vcf_RSPOS'].get('value', ''),
                        'allele_string': data['allele_string'].get('value', ''),
                        'synonyms': data['synonyms'].get('value', '')
                    }
                    y['genename'] = data['genename']['value']
                    y['1000-genomes'] = data['1000-genomes']['value']
                    if data['publications']['value']:
                        y['publications'] = data['publications']['value']
                    if data['gwas-effects']['value']:
                        y['effects'] = []
                        # y['effects'] = data['gwas-effects']['value']
                        for eff in data['gwas-effects']['value']:
                            effe = eff
                            if eff.get('effect', ''):
                                effe['risk'] = 'Increased risk of %s' % (eff['effect'])
                            else:
                                effe['risk'] = ''
                            if eff.get('initial', ''):
                                effe['evidences'] = 'Initial: %s' % (eff['initial'])
                                if eff.get('replication', ''):
                                    effe['evidences'] += ', replication: %s' % (eff['replication'])
                            else:
                                effe['evidences'] = ''
                            y['effects'].append(effe)
                    if data['genotype_frequency']['value']:
                        y['genotype_frequency'] = data['genotype_frequency']['value']

                    # disease
                    if data['disgenet-diseases']['value']:
                        arr_disease.extend(data['disgenet-diseases']['value'])
                        rs = [ item['disease'] for item in data['disgenet-diseases']['value'] ]
                        asso_disease.extend(rs)
                    if data['gwas-diseases']['value']:
                        for k in data['gwas-diseases']['value']:
                            arr_disease.append({
                                'disease': k.get('disease',''),
                                'pubmedid': k.get('pmid',''),
                                'sentence': k.get('sentence', '')
                            })
                        rs = [ item['disease'] for item in data['gwas-diseases']['value'] ]
                        asso_disease.extend(rs)
                    if len(arr_disease) > 0:
                        y['disgenet-diseases'] = arr_disease
                    if len(asso_disease) > 0:
                        y['associated_diseases'] = asso_disease

                    # publication
                    if data['publications']['value']:
                        for k in data['publications']['value']:
                            asso_pub.append({
                                'pmid': k.get('pmid', ''),
                                'title': k.get('title','')
                            })
                    if data['gwas-publications']['value']:
                        asso_pub.extend(data['gwas-publications']['value'])
                    if len(asso_pub) > 0:
                        y['associated_publications'] = asso_pub

                except Exception as e:
                    pass
                x.append(y)

            # Publish rabbitMQ
            self.publish_to_queue(x, GENOME_QUEUE, RABBITMQ_HOST, RABBITMQ_PORT)
            print "[***] DONE variation"
Beispiel #7
0
    def process(self, params={}):
        keys = [
            'synonyms', 'name', 'is_somatic', 'minor_allele_frequency',
            'evidence_attributes', 'ancestral_allele', 'minor_allele_count',
            'clinic_significance', 'minor_allele', 'effects', 'chromosome',
            'publications', 'genotype_frequency', 'hgvs', 'allele',
            'allele_frequency', 'associated_disease', 'attribute', 'var_type',
            'var_property', 'var_disease', 'reversed', 'gwas-effects',
            '1000-genomes', 'disgenet-diseases', 'genename', 'allele_string',
            'consequence_types', 'ensembl-id', 'name', 'vcf_U5', 'vcf_ASS',
            'vcf_DSS', 'vcf_INT', 'vcf_R3', 'vcf_R5', 'vcf_OTH', 'vcf_CFL',
            'vcf_ASP', 'vcf_MUT', 'vcf_VLD', 'vcf_G5A', 'vcf_G5', 'vcf_HD',
            'vcf_GNO', 'vcf_KGPhase1', 'vcf_KGPhase3', 'vcf_CDA', 'vcf_LSD',
            'vcf_MTP', 'vcf_OM', 'vcf_NOC', 'vcf_WTD', 'vcf_NOV', 'vcf_CAF',
            'vcf_COMMON', 'vcf_CLNHGVS', 'vcf_CLNALLE', 'vcf_CLNSRC',
            'vcf_CLNORIGIN', 'vcf_CLNSRCID', 'vcf_CLNSIG', 'vcf_CLNDSDB',
            'vcf_CLNDSDBID', 'vcf_CLNDBN', 'vcf_CLNREVSTAT', 'vcf_CLNACC',
            'vcf_REF', 'vcf_ALT', 'vcf_RS', 'vcf_RSPOS', 'vcf_RV', 'vcf_VP',
            'vcf_GENEINFO', 'vcf_dbSNPBuildID', 'vcf_SAO', 'vcf_SSR',
            'vcf_WGT', 'vcf_VC', 'vcf_PM', 'vcf_TPA', 'vcf_PMC', 'vcf_S3D',
            'vcf_SLO', 'vcf_NSF', 'vcf_NSM', 'vcf_NSN', 'vcf_REF', 'vcf_SYN',
            'vcf_U3'
        ]
        rsnumbers = []
        manager = variation_components.DataManager()
        for r in Variation.objects.all():
            try:
                vol = manager.get(r.code)
                if vol is None:
                    continue
                for k in vol.keys():
                    if k in keys and vol[k].get('value', '') not in ['', None]:
                        print "[Value]", vol[k].get('value', '')
                        rsnumbers.append(r.code)
            except Exception as e:
                pass
        print "[RSNUMBER] Top 10"
        x = Counter(rsnumbers)
        print sorted(x.items(), key=operator.itemgetter(0))[:10]

        keys = [
            'geneid', 'chromosome', 'start', 'end', 'num_exon',
            'protein_product', 'description', 'associated_disease', 'synonyms',
            'publications', 'havana_gene', 'biotype', 'is_reversed',
            'ctdbase-diseases', 'disgenet-diseases', 'id', 'name'
        ]
        genes = []
        manager = gene_components.DataManager()
        for r in Gene.objects.all():
            try:
                vol = manager.get(r.code)
                if vol is None:
                    continue
                for k in vol.keys():
                    if k in keys and vol[k].get('value', '') not in ['', None]:
                        print "[Value]", vol[k].get('value', '')
                        genes.append(r.code)
            except Exception as e:
                pass
        print "[GENE] Top 10"
        x = Counter(genes)
        print sorted(x.items(), key=operator.itemgetter(0))[:10]