def handle(self, *args, **options): maf = options.get('maf', None) update_all = options.get('update_all', None) study_id = options.get('study_id', None) perm_file = options.get('perm_file', None) try: if study_id: ids_aragwas = [study_id] else: # Run through all studies with hdf5 files ids_aragwas = Study.objects.all().values_list('id', flat=True) if perm_file: permutation_thresholds = load_permutation_thresholds(perm_file) else: permutation_thresholds = None counter = 0 for id in ids_aragwas: try: study = Study.objects.get(pk=id) if study.n_hits_bonf == None or update_all or study_id: # Condition for first run through, might be changed to update all hdf5_file = os.path.join(settings.HDF5_FILE_PATH, 'gwas_results', '%s.hdf5' % study.pk) perm_threshold = None if permutation_thresholds: perm_threshold = permutation_thresholds[study.pk] hits, thresholds = get_hit_count( hdf5_file, maf=maf, perm_threshold=perm_threshold) study.n_hits_bonf = hits['bonferroni_hits05'] study.n_hits_thr = hits['thr_e-4'] study.n_hits_fdr = hits['bh_hits'] study.bonferroni_threshold = thresholds[ 'bonferroni_threshold05'] study.bh_threshold = thresholds['bh_threshold'] study.n_hits_total = thresholds['total_associations'] if perm_file: study.n_hits_perm = hits['permutation_hits'] study.permutation_threshold = thresholds[ 'permutation'] study.save() self.stdout.write( self.style.SUCCESS( 'Study %s successfully updated' % study)) counter += 1 except Exception as err: self.stdout.write( self.style.ERROR('HDF5 file for study %s not found' % study)) print(str(counter) + ' studies updated in the database.') except Exception as err: raise CommandError('Error saving phenotypes. Reason: %s' % str(err))
def handle(self, *args, **options): study_id = options.get('study_id', None) perm_file = options.get('perm_file', None) try: if study_id: studies = [Study.objects.get(pk=study_id)] else: studies = Study.objects.all() if perm_file: permutation_thresholds = load_permutation_thresholds(perm_file) else: permutation_thresholds = {} num_studies = len(studies) counter = 0 jobs = group( index_study.s(study.pk, permutation_thresholds.get(study.pk, None)) for study in studies) result = jobs.apply_async() output = result.get() for (indexed_assoc, failed_assoc), study_id in output: try: if failed_assoc > 0: self.stdout.write( self.style.ERROR( '%s/%s Following associations failed to index for "%s" in elasticsearch' % (failed_assoc, indexed_assoc + failed_assoc, study_id))) elif indexed_assoc == 0: self.stdout.write( self.style.WARNING( 'No associations found that match the threshold. Skipping "%s" in elasticsearch' % study_id)) else: self.stdout.write( self.style.SUCCESS( 'Successfully indexed all %s assocations for "%s" in elasticsearch. (%s/%s finished)' % (indexed_assoc, study_id, counter, num_studies))) counter += 1 except FileNotFoundError as err: self.stdout.write( self.style.ERROR('HDF5 file for study %s not found' % study)) except Exception as err: raise CommandError('Error indexing GWAS studies. Reason: %s' % str(err))
def handle(self, *args, **options): study_id = options.get('study_id', None) perm_file = options.get('perm_file', None) try: if study_id: studies = [Study.objects.get(pk=study_id)] else: studies = Study.objects.all() if perm_file: permutation_thresholds = load_permutation_thresholds(perm_file) else: permutation_thresholds = None for study in studies: try: if permutation_thresholds: indexed_assoc, failed_assoc = index_study( study.pk, permutation_thresholds[study.pk]) else: indexed_assoc, failed_assoc = index_study(study.pk) if failed_assoc > 0: self.stdout.write( self.style.ERROR( '%s/%s SNPs failed to index for "%s" in elasticsearch' % (failed_assoc, indexed_assoc + failed_assoc, study))) elif indexed_assoc == 0: self.stdout.write( self.style.WARNING( 'No associations found that match the threshold. Skipping "%s" in elasticsearch' % (str(study)))) else: self.stdout.write( self.style.SUCCESS( 'Successfully indexed all %s assocations for "%s" in elasticsearch' % (indexed_assoc, study))) except FileNotFoundError as err: self.stdout.write( self.style.ERROR('HDF5 file for study %s not found' % study)) except Exception as err: raise CommandError('Error indexing GWAS studies. Reason: %s' % str(err))
def handle(self, *args, **options): hdf5_folder = os.fsencode(options['hdf5_folder']) maf = options['maf'] permutation_file = options.get('permutation_file', None) default_perm_threshold = options.get('permutation', None) if default_perm_threshold: if default_perm_threshold > 1: default_perm_threshold = default_perm_threshold else: default_perm_threshold = -math.log(default_perm_threshold,10) phenotype_id = options.get('phenotype_id', None) genotype_id = options.get('genotype_id', None) method = options.get('method', None) transformation = options.get('transformation', None) try: # checks if None in [genotype_id, method, transformation]: raise ValueError("You must provide genotype_id, method and transformation") genotype = Genotype.objects.get(pk=genotype_id) counter = 0 if permutation_file: permutation_thresholds = load_permutation_thresholds(permutation_file) else: permutation_thresholds = {} files = sorted(os.listdir(hdf5_folder)) num_files = len(files) for file in files: filename = os.fsdecode(file) try: if not filename.endswith(".hdf5"): continue phenotype_id = filename.split(".")[0] phenotype = Phenotype.objects.get(pk=phenotype_id) perm_threshold = permutation_thresholds.get(phenotype_id, default_perm_threshold) hits, thresholds = get_hit_count(os.path.join(options['hdf5_folder'], filename), maf=maf, perm_threshold=perm_threshold) r = requests.get('https://arapheno.1001genomes.org/rest/phenotype/%s/values.json' % phenotype_id) accessions = r.json() countries = [acc['accession_country'] for acc in accessions] study_name='%s_%s_%s_%s' % (phenotype.name.replace(" ","_"), transformation, genotype.name, method), try: study = Study.objects.get(pk = phenotype_id) except Study.DoesNotExist: study = Study(pk=phenotype_id) study.name = study_name study.transformation=transformation study.genotype=genotype study.phenotype=phenotype study.method=method study.n_hits_bonf=hits['bonferroni_hits05'] study.n_hits_thr=hits['thr_e-4'] study.n_hits_fdr=hits['bh_hits'] study.bonferroni_threshold=thresholds['bonferroni_threshold05'] study.bh_threshold=thresholds['bh_threshold'] study.n_hits_total=thresholds['total_associations'] study.number_samples=len(accessions) study.number_countries=len(set(countries)) study.n_hits_perm = hits.get('permutation_hits', None) study.permutation_threshold = thresholds.get('permutation', None) study.save() counter +=1 self.stdout.write(self.style.SUCCESS('Study %s(%s) updated (%s/%s finished)' % (study.name, study.pk, counter,num_files))) except Exception as err: import pdb ; pdb.set_trace() self.stdout.write(self.style.ERROR('Impossible to update study from file %s. Reason: %s' % (filename, str(err)))) except Exception as err: import pdb ; pdb.set_trace() raise CommandError( 'Error updating studies. Reason: %s' % str(err)) # # Publication dict for available studies... # publication_links_dict = { # 'Atwell et. al, Nature 2010': 'https://doi.org/10.1038/nature08800', # 'Flowering time in simulated seasons': 'https://doi.org/10.1073/pnas.1007431107', # 'Mejion': 'https://doi.org/10.1038/ng.2824', # 'DAAR': 'https://doi.org/10.1073/pnas.1503272112', # 'Ion Concentration':'https://doi.org/10.1371/journal.pbio.1002009', # '1001genomes flowering time phenotypes': 'https://doi.org/10.1016/j.cell.2016.05.063'} # publication_PMID = { # 'Atwell et. al, Nature 2010': '20336072', # 'Flowering time in simulated seasons': '21078970', # 'Mejion': '24212884', # 'DAAR': '26324904', # 'Ion Concentration': '25464340', # '1001genomes flowering time phenotypes': '27293186' # } # publication_PMCID = { # 'Atwell et. al, Nature 2010': 'PMC3023908', # 'Flowering time in simulated seasons': 'PMC3000268', # 'Mejion': '', # 'DAAR': 'PMC4577208', # 'Ion Concentration': 'PMC4251824', # '1001genomes flowering time phenotypes': 'PMC4949382' # }