Example #1
0
 def handle(self, *args, **options):
     maf = options.get('maf', None)
     update_all = options.get('update_all', None)
     study_id = options.get('study_id', None)
     perm_file = options.get('perm_file', None)
     try:
         if study_id:
             ids_aragwas = [study_id]
         else:
             # Run through all studies with hdf5 files
             ids_aragwas = Study.objects.all().values_list('id', flat=True)
         if perm_file:
             permutation_thresholds = load_permutation_thresholds(perm_file)
         else:
             permutation_thresholds = None
         counter = 0
         for id in ids_aragwas:
             try:
                 study = Study.objects.get(pk=id)
                 if study.n_hits_bonf == None or update_all or study_id:  # Condition for first run through, might be changed to update all
                     hdf5_file = os.path.join(settings.HDF5_FILE_PATH,
                                              'gwas_results',
                                              '%s.hdf5' % study.pk)
                     perm_threshold = None
                     if permutation_thresholds:
                         perm_threshold = permutation_thresholds[study.pk]
                     hits, thresholds = get_hit_count(
                         hdf5_file, maf=maf, perm_threshold=perm_threshold)
                     study.n_hits_bonf = hits['bonferroni_hits05']
                     study.n_hits_thr = hits['thr_e-4']
                     study.n_hits_fdr = hits['bh_hits']
                     study.bonferroni_threshold = thresholds[
                         'bonferroni_threshold05']
                     study.bh_threshold = thresholds['bh_threshold']
                     study.n_hits_total = thresholds['total_associations']
                     if perm_file:
                         study.n_hits_perm = hits['permutation_hits']
                         study.permutation_threshold = thresholds[
                             'permutation']
                     study.save()
                     self.stdout.write(
                         self.style.SUCCESS(
                             'Study %s successfully updated' % study))
                     counter += 1
             except Exception as err:
                 self.stdout.write(
                     self.style.ERROR('HDF5 file for study %s not found' %
                                      study))
         print(str(counter) + ' studies updated in the database.')
     except Exception as err:
         raise CommandError('Error saving phenotypes. Reason: %s' %
                            str(err))
Example #2
0
    def handle(self, *args, **options):
        study_id = options.get('study_id', None)
        perm_file = options.get('perm_file', None)
        try:
            if study_id:
                studies = [Study.objects.get(pk=study_id)]
            else:
                studies = Study.objects.all()
            if perm_file:
                permutation_thresholds = load_permutation_thresholds(perm_file)
            else:
                permutation_thresholds = {}
            num_studies = len(studies)
            counter = 0
            jobs = group(
                index_study.s(study.pk,
                              permutation_thresholds.get(study.pk, None))
                for study in studies)
            result = jobs.apply_async()
            output = result.get()

            for (indexed_assoc, failed_assoc), study_id in output:
                try:
                    if failed_assoc > 0:
                        self.stdout.write(
                            self.style.ERROR(
                                '%s/%s Following associations failed to index for "%s" in elasticsearch'
                                % (failed_assoc, indexed_assoc + failed_assoc,
                                   study_id)))
                    elif indexed_assoc == 0:
                        self.stdout.write(
                            self.style.WARNING(
                                'No associations found that match the threshold. Skipping "%s" in elasticsearch'
                                % study_id))
                    else:
                        self.stdout.write(
                            self.style.SUCCESS(
                                'Successfully indexed all %s assocations for "%s" in elasticsearch. (%s/%s finished)'
                                % (indexed_assoc, study_id, counter,
                                   num_studies)))
                    counter += 1
                except FileNotFoundError as err:
                    self.stdout.write(
                        self.style.ERROR('HDF5 file for study %s not found' %
                                         study))
        except Exception as err:
            raise CommandError('Error indexing GWAS studies. Reason: %s' %
                               str(err))
Example #3
0
 def handle(self, *args, **options):
     study_id = options.get('study_id', None)
     perm_file = options.get('perm_file', None)
     try:
         if study_id:
             studies = [Study.objects.get(pk=study_id)]
         else:
             studies = Study.objects.all()
         if perm_file:
             permutation_thresholds = load_permutation_thresholds(perm_file)
         else:
             permutation_thresholds = None
         for study in studies:
             try:
                 if permutation_thresholds:
                     indexed_assoc, failed_assoc = index_study(
                         study.pk, permutation_thresholds[study.pk])
                 else:
                     indexed_assoc, failed_assoc = index_study(study.pk)
                 if failed_assoc > 0:
                     self.stdout.write(
                         self.style.ERROR(
                             '%s/%s SNPs failed to index for "%s" in elasticsearch'
                             % (failed_assoc, indexed_assoc + failed_assoc,
                                study)))
                 elif indexed_assoc == 0:
                     self.stdout.write(
                         self.style.WARNING(
                             'No associations found that match the threshold. Skipping "%s" in elasticsearch'
                             % (str(study))))
                 else:
                     self.stdout.write(
                         self.style.SUCCESS(
                             'Successfully indexed all %s assocations for "%s" in elasticsearch'
                             % (indexed_assoc, study)))
             except FileNotFoundError as err:
                 self.stdout.write(
                     self.style.ERROR('HDF5 file for study %s not found' %
                                      study))
     except Exception as err:
         raise CommandError('Error indexing GWAS studies. Reason: %s' %
                            str(err))
Example #4
0
    def handle(self, *args, **options):
        hdf5_folder = os.fsencode(options['hdf5_folder'])
        maf = options['maf']
        permutation_file =  options.get('permutation_file', None)
        default_perm_threshold = options.get('permutation', None)
        if default_perm_threshold:
            if default_perm_threshold > 1:
                default_perm_threshold = default_perm_threshold
            else:
                default_perm_threshold = -math.log(default_perm_threshold,10)
        phenotype_id = options.get('phenotype_id', None)
        genotype_id = options.get('genotype_id', None)
        method = options.get('method', None)
        transformation = options.get('transformation', None)
        try:
            # checks
            if None in [genotype_id, method, transformation]:
                raise ValueError("You must provide genotype_id, method and transformation")
            genotype = Genotype.objects.get(pk=genotype_id)
            counter = 0
            if permutation_file:
                permutation_thresholds = load_permutation_thresholds(permutation_file)
            else:
                permutation_thresholds = {}
            files = sorted(os.listdir(hdf5_folder))
            num_files = len(files)
            for file in files:
                filename = os.fsdecode(file)
                try:
                    if not filename.endswith(".hdf5"):
                        continue
                    phenotype_id = filename.split(".")[0]
                    phenotype = Phenotype.objects.get(pk=phenotype_id)

                    perm_threshold = permutation_thresholds.get(phenotype_id, default_perm_threshold)
                    hits, thresholds = get_hit_count(os.path.join(options['hdf5_folder'], filename), maf=maf, perm_threshold=perm_threshold)
                    r = requests.get('https://arapheno.1001genomes.org/rest/phenotype/%s/values.json' % phenotype_id)
                    accessions = r.json()
                    countries = [acc['accession_country'] for acc in accessions]
                    study_name='%s_%s_%s_%s' % (phenotype.name.replace(" ","_"), transformation, genotype.name, method),
                    try:
                        study = Study.objects.get(pk = phenotype_id)
                    except Study.DoesNotExist:
                        study = Study(pk=phenotype_id)
                    study.name = study_name
                    study.transformation=transformation
                    study.genotype=genotype
                    study.phenotype=phenotype
                    study.method=method
                    study.n_hits_bonf=hits['bonferroni_hits05']
                    study.n_hits_thr=hits['thr_e-4']
                    study.n_hits_fdr=hits['bh_hits']
                    study.bonferroni_threshold=thresholds['bonferroni_threshold05']
                    study.bh_threshold=thresholds['bh_threshold']
                    study.n_hits_total=thresholds['total_associations']
                    study.number_samples=len(accessions)
                    study.number_countries=len(set(countries))
                    study.n_hits_perm = hits.get('permutation_hits', None)
                    study.permutation_threshold = thresholds.get('permutation', None)
                    study.save()
                    counter +=1
                    self.stdout.write(self.style.SUCCESS('Study %s(%s) updated (%s/%s finished)' % (study.name, study.pk, counter,num_files)))
                except Exception as err:
                    import pdb ; pdb.set_trace()
                    self.stdout.write(self.style.ERROR('Impossible to update study from file %s. Reason: %s' % (filename, str(err))))
        except Exception as err:
            import pdb ; pdb.set_trace()
            raise CommandError(
                'Error updating studies. Reason: %s' % str(err))

# # Publication dict for available studies...
# publication_links_dict = {
#                 'Atwell et. al, Nature 2010': 'https://doi.org/10.1038/nature08800',
#                 'Flowering time in simulated seasons': 'https://doi.org/10.1073/pnas.1007431107',
#                 'Mejion': 'https://doi.org/10.1038/ng.2824',
#                 'DAAR': 'https://doi.org/10.1073/pnas.1503272112',
#                 'Ion Concentration':'https://doi.org/10.1371/journal.pbio.1002009',
#                 '1001genomes flowering time phenotypes': 'https://doi.org/10.1016/j.cell.2016.05.063'}
# publication_PMID = {
#     'Atwell et. al, Nature 2010': '20336072',
#     'Flowering time in simulated seasons': '21078970',
#     'Mejion': '24212884',
#     'DAAR': '26324904',
#     'Ion Concentration': '25464340',
#     '1001genomes flowering time phenotypes': '27293186'
# }
# publication_PMCID = {
#     'Atwell et. al, Nature 2010': 'PMC3023908',
#     'Flowering time in simulated seasons': 'PMC3000268',
#     'Mejion': '',
#     'DAAR': 'PMC4577208',
#     'Ion Concentration': 'PMC4251824',
#     '1001genomes flowering time phenotypes': 'PMC4949382'
# }