def process(self, params={}): self._MANAGERS = { 'variation': variation_components.DataManager(), 'treatment': treatment_components.DataManager(), 'trait': trait_components.DataManager(), 'publication': publication_components.DataManager(), 'gene': gene_components.DataManager(), 'exon': exon_components.DataManager(), 'disease': disease_components.DataManager(), 'chromosome': chromosome_components.DataManager(), 'drug': drug_components.DataManager(), } print "[x] RECEIVING DATA" credentials = pika.PlainCredentials(settings.RABBITMQ_USER, settings.RABBITMQ_PASS) connection = pika.BlockingConnection( pika.ConnectionParameters(settings.RABBITMQ_HOST, settings.RABBITMQ_PORT, '/', credentials)) channel = connection.channel() channel.queue_declare(queue=os.environ.get('GENODATA_QUEUE')) print "[*] Waiting for data. To exit press CTRL+C" channel.basic_consume(self.consume, queue=os.environ.get('GENODATA_QUEUE'), no_ack=True) channel.start_consuming()
def process(self, params={}): fields = ['name', 'start', 'end', 'chromosome'] print '[***] Starting' # Reset this field Gene.objects.filter(is_good_quality=True).update(is_good_quality=False) manager = gene_components.DataManager() isDone = False start = 0 while not isDone: end = start + 5000 # print 'start: %s, end: %s' % (start, end) records = Gene.objects.all()[start:end] start = end + 1 if records.count() <= 0: isDone = True is_good_quality = False ids = [] for var in records: count = 0 data = manager.get(var.code) for i in fields: if i in data and data[i]['value'] != None: count += 1 if count == len(fields): ids.append(var.id) # Update database Gene.objects.filter(pk__in=ids).update(is_good_quality=True) print '[***] %s is good quality ---- DONE' % (len(ids))
def process(self, params={}): _managers = { 'variation': variation_components.DataManager(), 'treatment': treatment_components.DataManager(), 'trait': trait_components.DataManager(), 'publication': publication_components.DataManager(), 'gene': gene_components.DataManager(), 'exon': exon_components.DataManager(), 'disease': disease_components.DataManager(), 'chromosome': chromosome_components.DataManager(), 'drug': drug_components.DataManager(), } _filename = params.get('_FILE') if _filename: print "[x] RECEIVING DATA" try: with gzip.open('{}'.format(_filename), 'r') as f: for line in f: data = json.loads(line) if len(data) >= 5: field_not_exist = [] print "Entity: %s, Record: %s, Field: %s, Value: %s" % ( data[1], data[2], data[3], data[4]) try: if _managers.get(data[1], None) != None: field_not_exist = _managers[data[1]].put( data[2], {data[3]: data[4]}, source=data[0]) except Exception as e: pass if field_not_exist: print "[**] Fields are not exist in Genodata: %s" % ( field_not_exist) except Exception as e: raise e else: print "[Error] Expected gzip text file to import, empty given"
def process(self, params={}): # DECLARE VARIABLE GENOME_QUEUE = settings.GENOME_QUEUE RABBITMQ_HOST = settings.RABBITMQ_HOST RABBITMQ_PORT = int(settings.RABBITMQ_PORT) # Starting print "[x] Publish data to rabbitmq" ########################## ## Gene print "[***] Publish GENE data to rabbitmq" isDone = False start = 0 gene_manager = gene_components.DataManager() while not isDone: end = start + 5000 print 'start: %s, end: %s' % (start, end) gene = Gene.objects.all()[start:end] start = end + 1 if gene.count() <= 0: isDone = True x = [] for var in gene: y = ['gene', var.code] try: data = gene_manager.get(var.code) values = {} arr_disease = [] asso_disease = [] asso_pub = [] for field, value in data.items(): if field in [ 'synonyms', 'effects', 'start', 'end', 'num_exon', 'chromosome', 'protein_product', 'description' ] and value['value'] != None: values[field] = value['value'] # disease field if field == 'disgenet-diseases' and value[ 'value'] != None: arr_disease.extend(value['value']) rs = [item['disease'] for item in value['value']] asso_disease.extend(rs) if field == 'gwas-diseases' and value['value'] != None: try: for k in value['value']: arr_disease.append({ 'disease': k.get('disease', ''), 'pubmedid': k.get('pmid', ''), 'sentence': k.get('sentence', '') }) except Exception as e: pass rs = [item['disease'] for item in value['value']] asso_disease.extend(rs) if field == 'ctdbase-diseases' and value[ 'value'] != None: try: for k in value['value']: arr_disease.append({ 'disease': k.get('disease', ''), 'pubmedid': k.get('pmid', ''), 'sentence': k.get('evidence', '') }) except Exception as e: pass rs = [item['disease'] for item in value['value']] asso_disease.extend(rs) if len(arr_disease) > 0: values['disgenet-diseases'] = arr_disease if len(asso_disease) > 0: values['associated_diseases'] = asso_disease # publications if field == 'publications' and value['value'] != None: values[field] = value['value'] try: for k in value['value']: asso_pub.append({ 'pmid': k['pmid'], 'title': k['title'] }) except Exception as e: pass if field == 'gwas-publications' and value[ 'value'] != None: asso_pub.extend(value['value']) if len(asso_pub) > 0: values['associated_publications'] = asso_pub if values: y.append(values) x.append(y) except Exception as e: pass # Publish rabbitMQ self.publish_to_queue(x, GENOME_QUEUE, RABBITMQ_HOST, RABBITMQ_PORT) print "[***] DONE gene" print "[x] Sent data to RabbitMQ"
def process(self, params = {}): # DECLARE VARIABLE GENOME_QUEUE = 'genome-browser-gene' RABBITMQ_HOST = settings.RABBITMQ_HOST RABBITMQ_PORT = int(settings.RABBITMQ_PORT) # Starting print "[x] Publish data to rabbitmq" ########################## ## Variation isDone = False start = 0 manager = gene_components.DataManager() while not isDone: end = start + 5000 # print 'start: %s, end: %s' % (start, end) gene = Gene.objects.filter(is_good_quality=True)[start:end] start = end + 1 if gene.count() <= 0: isDone = True x = [] for var in gene: y = {'version': '0.1', 'name': var.code} try: data = manager.get(var.code) # print 'code: %s' % (code) arr_disease = [] asso_disease = [] asso_pub = [] y['core_attributes'] = { 'chromosome': data['chromosome']['value'], 'start': data['start']['value'], 'end': data['end']['value'], 'synonyms': data['synonyms']['value'] if data['synonyms']['value'] != None else [] } if data['publications']['value']: y['publications'] = data['publications']['value'] if data['protein_product']['value']: y['protein_product'] = data['protein_product']['value'] if data['description']['value']: y['description'] = data['description']['value'] # disease if data['disgenet-diseases']['value']: arr_disease.extend(data['disgenet-diseases']['value']) rs = [ item['disease'] for item in data['disgenet-diseases']['value'] ] asso_disease.extend(rs) if data['gwas-diseases']['value']: for k in data['gwas-diseases']['value']: arr_disease.append({ 'disease': k.get('disease',''), 'pubmedid': k.get('pmid',''), 'sentence': k.get('sentence', '') }) rs = [ item['disease'] for item in data['gwas-diseases']['value'] ] asso_disease.extend(rs) if data['ctdbase-diseases']['value']: for k in data['gwas-diseases']['value']: arr_disease.append({ 'disease': k.get('disease',''), 'pubmedid': k.get('pmid',''), 'sentence': k.get('evidence', '') }) rs = [ item['disease'] for item in data['gwas-diseases']['value'] ] asso_disease.extend(rs) if len(arr_disease) > 0: y['disgenet-diseases'] = arr_disease if len(asso_disease) > 0: y['associated_diseases'] = asso_disease # publication if data['publications']['value']: for k in data['publications']['value']: asso_pub.append({ 'pmid': k.get('pmid', ''), 'title': k.get('title','') }) if data['gwas-publications']['value']: asso_pub.extend(data['gwas-publications']['value']) if len(asso_pub) > 0: y['associated_publications'] = asso_pub except Exception as e: pass x.append(y) # Publish rabbitMQ self.publish_to_queue(x, GENOME_QUEUE, RABBITMQ_HOST, RABBITMQ_PORT) print "[***] DONE gene"
def process(self, params={}): keys = [ 'synonyms', 'name', 'is_somatic', 'minor_allele_frequency', 'evidence_attributes', 'ancestral_allele', 'minor_allele_count', 'clinic_significance', 'minor_allele', 'effects', 'chromosome', 'publications', 'genotype_frequency', 'hgvs', 'allele', 'allele_frequency', 'associated_disease', 'attribute', 'var_type', 'var_property', 'var_disease', 'reversed', 'gwas-effects', '1000-genomes', 'disgenet-diseases', 'genename', 'allele_string', 'consequence_types', 'ensembl-id', 'name', 'vcf_U5', 'vcf_ASS', 'vcf_DSS', 'vcf_INT', 'vcf_R3', 'vcf_R5', 'vcf_OTH', 'vcf_CFL', 'vcf_ASP', 'vcf_MUT', 'vcf_VLD', 'vcf_G5A', 'vcf_G5', 'vcf_HD', 'vcf_GNO', 'vcf_KGPhase1', 'vcf_KGPhase3', 'vcf_CDA', 'vcf_LSD', 'vcf_MTP', 'vcf_OM', 'vcf_NOC', 'vcf_WTD', 'vcf_NOV', 'vcf_CAF', 'vcf_COMMON', 'vcf_CLNHGVS', 'vcf_CLNALLE', 'vcf_CLNSRC', 'vcf_CLNORIGIN', 'vcf_CLNSRCID', 'vcf_CLNSIG', 'vcf_CLNDSDB', 'vcf_CLNDSDBID', 'vcf_CLNDBN', 'vcf_CLNREVSTAT', 'vcf_CLNACC', 'vcf_REF', 'vcf_ALT', 'vcf_RS', 'vcf_RSPOS', 'vcf_RV', 'vcf_VP', 'vcf_GENEINFO', 'vcf_dbSNPBuildID', 'vcf_SAO', 'vcf_SSR', 'vcf_WGT', 'vcf_VC', 'vcf_PM', 'vcf_TPA', 'vcf_PMC', 'vcf_S3D', 'vcf_SLO', 'vcf_NSF', 'vcf_NSM', 'vcf_NSN', 'vcf_REF', 'vcf_SYN', 'vcf_U3' ] rsnumbers = [] manager = variation_components.DataManager() for r in Variation.objects.all(): try: vol = manager.get(r.code) if vol is None: continue for k in vol.keys(): if k in keys and vol[k].get('value', '') not in ['', None]: print "[Value]", vol[k].get('value', '') rsnumbers.append(r.code) except Exception as e: pass print "[RSNUMBER] Top 10" x = Counter(rsnumbers) print sorted(x.items(), key=operator.itemgetter(0))[:10] keys = [ 'geneid', 'chromosome', 'start', 'end', 'num_exon', 'protein_product', 'description', 'associated_disease', 'synonyms', 'publications', 'havana_gene', 'biotype', 'is_reversed', 'ctdbase-diseases', 'disgenet-diseases', 'id', 'name' ] genes = [] manager = gene_components.DataManager() for r in Gene.objects.all(): try: vol = manager.get(r.code) if vol is None: continue for k in vol.keys(): if k in keys and vol[k].get('value', '') not in ['', None]: print "[Value]", vol[k].get('value', '') genes.append(r.code) except Exception as e: pass print "[GENE] Top 10" x = Counter(genes) print sorted(x.items(), key=operator.itemgetter(0))[:10]