Example #1
0
    def process(self, params={}):
        self._MANAGERS = {
            'variation': variation_components.DataManager(),
            'treatment': treatment_components.DataManager(),
            'trait': trait_components.DataManager(),
            'publication': publication_components.DataManager(),
            'gene': gene_components.DataManager(),
            'exon': exon_components.DataManager(),
            'disease': disease_components.DataManager(),
            'chromosome': chromosome_components.DataManager(),
            'drug': drug_components.DataManager(),
        }

        print "[x] RECEIVING DATA"
        credentials = pika.PlainCredentials(settings.RABBITMQ_USER,
                                            settings.RABBITMQ_PASS)
        connection = pika.BlockingConnection(
            pika.ConnectionParameters(settings.RABBITMQ_HOST,
                                      settings.RABBITMQ_PORT, '/',
                                      credentials))
        channel = connection.channel()
        channel.queue_declare(queue=os.environ.get('GENODATA_QUEUE'))
        print "[*] Waiting for data. To exit press CTRL+C"
        channel.basic_consume(self.consume,
                              queue=os.environ.get('GENODATA_QUEUE'),
                              no_ack=True)
        channel.start_consuming()
Example #2
0
    def process(self, params={}):
        fields = ['name', 'start', 'end', 'chromosome']

        print '[***] Starting'
        # Reset this field
        Gene.objects.filter(is_good_quality=True).update(is_good_quality=False)

        manager = gene_components.DataManager()
        isDone = False
        start = 0

        while not isDone:
            end = start + 5000
            # print 'start: %s, end: %s' % (start, end)
            records = Gene.objects.all()[start:end]
            start = end + 1
            if records.count() <= 0:
                isDone = True

            is_good_quality = False
            ids = []

            for var in records:
                count = 0
                data = manager.get(var.code)
                for i in fields:
                    if i in data and data[i]['value'] != None:
                        count += 1
                if count == len(fields):
                    ids.append(var.id)

            # Update database
            Gene.objects.filter(pk__in=ids).update(is_good_quality=True)
            print '[***] %s is good quality ---- DONE' % (len(ids))
Example #3
0
    def process(self, params={}):
        _managers = {
            'variation': variation_components.DataManager(),
            'treatment': treatment_components.DataManager(),
            'trait': trait_components.DataManager(),
            'publication': publication_components.DataManager(),
            'gene': gene_components.DataManager(),
            'exon': exon_components.DataManager(),
            'disease': disease_components.DataManager(),
            'chromosome': chromosome_components.DataManager(),
            'drug': drug_components.DataManager(),
        }

        _filename = params.get('_FILE')
        if _filename:
            print "[x] RECEIVING DATA"
            try:
                with gzip.open('{}'.format(_filename), 'r') as f:
                    for line in f:
                        data = json.loads(line)
                        if len(data) >= 5:
                            field_not_exist = []
                            print "Entity: %s, Record: %s, Field: %s, Value: %s" % (
                                data[1], data[2], data[3], data[4])
                            try:
                                if _managers.get(data[1], None) != None:
                                    field_not_exist = _managers[data[1]].put(
                                        data[2], {data[3]: data[4]},
                                        source=data[0])
                            except Exception as e:
                                pass
                            if field_not_exist:
                                print "[**] Fields are not exist in Genodata: %s" % (
                                    field_not_exist)
            except Exception as e:
                raise e
        else:
            print "[Error] Expected gzip text file to import, empty given"
Example #4
0
    def process(self, params={}):
        # DECLARE VARIABLE
        GENOME_QUEUE = settings.GENOME_QUEUE
        RABBITMQ_HOST = settings.RABBITMQ_HOST
        RABBITMQ_PORT = int(settings.RABBITMQ_PORT)

        # Starting
        print "[x] Publish data to rabbitmq"
        ##########################
        ## Gene
        print "[***] Publish GENE data to rabbitmq"
        isDone = False
        start = 0
        gene_manager = gene_components.DataManager()
        while not isDone:
            end = start + 5000
            print 'start: %s, end: %s' % (start, end)
            gene = Gene.objects.all()[start:end]
            start = end + 1
            if gene.count() <= 0:
                isDone = True

            x = []
            for var in gene:
                y = ['gene', var.code]

                try:
                    data = gene_manager.get(var.code)
                    values = {}
                    arr_disease = []
                    asso_disease = []
                    asso_pub = []
                    for field, value in data.items():
                        if field in [
                                'synonyms', 'effects', 'start', 'end',
                                'num_exon', 'chromosome', 'protein_product',
                                'description'
                        ] and value['value'] != None:
                            values[field] = value['value']
                        # disease field
                        if field == 'disgenet-diseases' and value[
                                'value'] != None:
                            arr_disease.extend(value['value'])
                            rs = [item['disease'] for item in value['value']]
                            asso_disease.extend(rs)
                        if field == 'gwas-diseases' and value['value'] != None:
                            try:
                                for k in value['value']:
                                    arr_disease.append({
                                        'disease':
                                        k.get('disease', ''),
                                        'pubmedid':
                                        k.get('pmid', ''),
                                        'sentence':
                                        k.get('sentence', '')
                                    })
                            except Exception as e:
                                pass
                            rs = [item['disease'] for item in value['value']]
                            asso_disease.extend(rs)
                        if field == 'ctdbase-diseases' and value[
                                'value'] != None:
                            try:
                                for k in value['value']:
                                    arr_disease.append({
                                        'disease':
                                        k.get('disease', ''),
                                        'pubmedid':
                                        k.get('pmid', ''),
                                        'sentence':
                                        k.get('evidence', '')
                                    })
                            except Exception as e:
                                pass

                            rs = [item['disease'] for item in value['value']]
                            asso_disease.extend(rs)

                        if len(arr_disease) > 0:
                            values['disgenet-diseases'] = arr_disease
                        if len(asso_disease) > 0:
                            values['associated_diseases'] = asso_disease

                        # publications

                        if field == 'publications' and value['value'] != None:
                            values[field] = value['value']
                            try:
                                for k in value['value']:
                                    asso_pub.append({
                                        'pmid': k['pmid'],
                                        'title': k['title']
                                    })
                            except Exception as e:
                                pass
                        if field == 'gwas-publications' and value[
                                'value'] != None:
                            asso_pub.extend(value['value'])

                        if len(asso_pub) > 0:
                            values['associated_publications'] = asso_pub

                    if values:
                        y.append(values)
                        x.append(y)
                except Exception as e:
                    pass

            # Publish rabbitMQ
            self.publish_to_queue(x, GENOME_QUEUE, RABBITMQ_HOST,
                                  RABBITMQ_PORT)
        print "[***] DONE gene"

        print "[x] Sent data to RabbitMQ"
Example #5
0
    def process(self, params = {}):
        # DECLARE VARIABLE
        GENOME_QUEUE = 'genome-browser-gene'
        RABBITMQ_HOST = settings.RABBITMQ_HOST
        RABBITMQ_PORT = int(settings.RABBITMQ_PORT)

        # Starting
        print "[x] Publish data to rabbitmq"
        ##########################
        ## Variation
        isDone = False
        start = 0
        manager = gene_components.DataManager()
        while not isDone:
            end = start + 5000
            # print 'start: %s, end: %s' % (start, end)
            gene = Gene.objects.filter(is_good_quality=True)[start:end]
            start = end + 1
            if gene.count() <= 0:
                isDone = True

            x = []
            for var in gene:
                y = {'version': '0.1', 'name': var.code}
                try:
                    data = manager.get(var.code)
                    # print 'code: %s' % (code)
                    arr_disease = []
                    asso_disease = []
                    asso_pub = []
                    y['core_attributes'] = {
                        'chromosome': data['chromosome']['value'],
                        'start': data['start']['value'],
                        'end': data['end']['value'],
                        'synonyms': data['synonyms']['value'] if data['synonyms']['value'] != None else []
                    }
                    if data['publications']['value']:
                        y['publications'] = data['publications']['value']
                    if data['protein_product']['value']:
                        y['protein_product'] = data['protein_product']['value']
                    if data['description']['value']:
                        y['description'] = data['description']['value']
                    # disease
                    if data['disgenet-diseases']['value']:
                        arr_disease.extend(data['disgenet-diseases']['value'])
                        rs = [ item['disease'] for item in data['disgenet-diseases']['value'] ]
                        asso_disease.extend(rs)
                    if data['gwas-diseases']['value']:
                        for k in data['gwas-diseases']['value']:
                            arr_disease.append({
                                'disease': k.get('disease',''),
                                'pubmedid': k.get('pmid',''),
                                'sentence': k.get('sentence', '')
                            })
                        rs = [ item['disease'] for item in data['gwas-diseases']['value'] ]
                        asso_disease.extend(rs)
                    if data['ctdbase-diseases']['value']:
                        for k in data['gwas-diseases']['value']:
                            arr_disease.append({
                                'disease': k.get('disease',''),
                                'pubmedid': k.get('pmid',''),
                                'sentence': k.get('evidence', '')
                            })
                        rs = [ item['disease'] for item in data['gwas-diseases']['value'] ]
                        asso_disease.extend(rs)

                    if len(arr_disease) > 0:
                        y['disgenet-diseases'] = arr_disease
                    if len(asso_disease) > 0:
                        y['associated_diseases'] = asso_disease

                    # publication
                    if data['publications']['value']:
                        for k in data['publications']['value']:
                            asso_pub.append({
                                'pmid': k.get('pmid', ''),
                                'title': k.get('title','')
                            })
                    if data['gwas-publications']['value']:
                        asso_pub.extend(data['gwas-publications']['value'])
                    if len(asso_pub) > 0:
                        y['associated_publications'] = asso_pub

                except Exception as e:
                    pass
                x.append(y)
            # Publish rabbitMQ
            self.publish_to_queue(x, GENOME_QUEUE, RABBITMQ_HOST, RABBITMQ_PORT)
            print "[***] DONE gene"
Example #6
0
    def process(self, params={}):
        keys = [
            'synonyms', 'name', 'is_somatic', 'minor_allele_frequency',
            'evidence_attributes', 'ancestral_allele', 'minor_allele_count',
            'clinic_significance', 'minor_allele', 'effects', 'chromosome',
            'publications', 'genotype_frequency', 'hgvs', 'allele',
            'allele_frequency', 'associated_disease', 'attribute', 'var_type',
            'var_property', 'var_disease', 'reversed', 'gwas-effects',
            '1000-genomes', 'disgenet-diseases', 'genename', 'allele_string',
            'consequence_types', 'ensembl-id', 'name', 'vcf_U5', 'vcf_ASS',
            'vcf_DSS', 'vcf_INT', 'vcf_R3', 'vcf_R5', 'vcf_OTH', 'vcf_CFL',
            'vcf_ASP', 'vcf_MUT', 'vcf_VLD', 'vcf_G5A', 'vcf_G5', 'vcf_HD',
            'vcf_GNO', 'vcf_KGPhase1', 'vcf_KGPhase3', 'vcf_CDA', 'vcf_LSD',
            'vcf_MTP', 'vcf_OM', 'vcf_NOC', 'vcf_WTD', 'vcf_NOV', 'vcf_CAF',
            'vcf_COMMON', 'vcf_CLNHGVS', 'vcf_CLNALLE', 'vcf_CLNSRC',
            'vcf_CLNORIGIN', 'vcf_CLNSRCID', 'vcf_CLNSIG', 'vcf_CLNDSDB',
            'vcf_CLNDSDBID', 'vcf_CLNDBN', 'vcf_CLNREVSTAT', 'vcf_CLNACC',
            'vcf_REF', 'vcf_ALT', 'vcf_RS', 'vcf_RSPOS', 'vcf_RV', 'vcf_VP',
            'vcf_GENEINFO', 'vcf_dbSNPBuildID', 'vcf_SAO', 'vcf_SSR',
            'vcf_WGT', 'vcf_VC', 'vcf_PM', 'vcf_TPA', 'vcf_PMC', 'vcf_S3D',
            'vcf_SLO', 'vcf_NSF', 'vcf_NSM', 'vcf_NSN', 'vcf_REF', 'vcf_SYN',
            'vcf_U3'
        ]
        rsnumbers = []
        manager = variation_components.DataManager()
        for r in Variation.objects.all():
            try:
                vol = manager.get(r.code)
                if vol is None:
                    continue
                for k in vol.keys():
                    if k in keys and vol[k].get('value', '') not in ['', None]:
                        print "[Value]", vol[k].get('value', '')
                        rsnumbers.append(r.code)
            except Exception as e:
                pass
        print "[RSNUMBER] Top 10"
        x = Counter(rsnumbers)
        print sorted(x.items(), key=operator.itemgetter(0))[:10]

        keys = [
            'geneid', 'chromosome', 'start', 'end', 'num_exon',
            'protein_product', 'description', 'associated_disease', 'synonyms',
            'publications', 'havana_gene', 'biotype', 'is_reversed',
            'ctdbase-diseases', 'disgenet-diseases', 'id', 'name'
        ]
        genes = []
        manager = gene_components.DataManager()
        for r in Gene.objects.all():
            try:
                vol = manager.get(r.code)
                if vol is None:
                    continue
                for k in vol.keys():
                    if k in keys and vol[k].get('value', '') not in ['', None]:
                        print "[Value]", vol[k].get('value', '')
                        genes.append(r.code)
            except Exception as e:
                pass
        print "[GENE] Top 10"
        x = Counter(genes)
        print sorted(x.items(), key=operator.itemgetter(0))[:10]