Esempi in Python per fetchData, esempi in Python per Tools.connect.fetchData

Esempio n. 1

0

Mostra file

File: getPhenotypes.py Progetto: BigDataehealthTools/GOAT_v3.1

def getSignigicantPhenotypes(type, value):
    phenotypes = pandas.DataFrame()

    # First we check the type of the input (rsID, Gene or Phenotype) and act accordingly
    UserLogs.add('Victor Dupuy', '255.255.255.255',
                 'accessed the module : Tools - getPhenotypes',
                 'MySQL Database',
                 ['assoc', 'marqueurs', 'phenotypes', 'experiment'])
    treshold = "0.001"

    if type == "rsID":
        #phenotypes = connect.fetchData("select p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where pvalue_assoc <"+treshold+" and m.nom in ('"+value+"') order by a.pvalue_assoc ASC")
        phenotypes = connect.fetchData(
            "select p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where m.nom in ('"
            + value + "')")
    elif type == "gene":
        #phenotypes = connect.fetchData("select DISTINCT p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where pvalue_assoc <"+treshold+" and (m.gene in ('"+value+"') or m.gene_before in ('"+value+"') or m.gene_after in ('"+value+"') ) order by a.pvalue_assoc ASC ")
        phenotypes = connect.fetchData(
            "select DISTINCT p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where (m.gene in ('"
            + value + "') or m.gene_before in ('" + value +
            "') or m.gene_after in ('" + value + "') ) ")
    else:
        phenotypes = pandas.DataFrame(data={'nom': value}, index=[0])

    return phenotypes

Esempio n. 2

0

Mostra file

File: views.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def AutoCompletePhenotypes(request, text):
    # First, we fetch the genes starting by the text value
    sqlQuery = "select nom from phenotypes where nom like '" + text + "%' and type='binomial'"
    phenotypesStarting = connect.fetchData(sqlQuery)
    # Same for the end of the name
    sqlQuery = "select nom from phenotypes where nom like '%" + text + "' and type='binomial'"
    phenotypesEnding = connect.fetchData(sqlQuery)
    # Finally, same with the name containing the text value
    sqlQuery = "select nom from phenotypes where nom like '%" + text + "%' and type='binomial'"
    phenotypesContaining = connect.fetchData(sqlQuery)

    # We merge the dataframes
    phenotypes = pandas.merge(
        pandas.merge(phenotypesStarting, phenotypesEnding, on="nom", how="outer"),
        phenotypesContaining,
        on="nom",
        how="outer",
    )
    # UserLogs.add(
    #     'Victor Dupuy',
    #     '255.255.255.255',
    #     'accessed the module : AutoComplete',
    #     'MySQL Database',
    #     ['phenotypes']
    # )
    phenotypes = phenotypes.to_json(orient="records")
    return HttpResponse(phenotypes)

Esempio n. 3

0

Mostra file

def AutoCompletePhenotypes(request, text):
    #First, we fetch the genes starting by the text value
    sqlQuery = "select nom from phenotypes where nom like '" + text + "%' and type='binomial'"
    phenotypesStarting = connect.fetchData(sqlQuery)
    #Same for the end of the name
    sqlQuery = "select nom from phenotypes where nom like '%" + text + "' and type='binomial'"
    phenotypesEnding = connect.fetchData(sqlQuery)
    #Finally, same with the name containing the text value
    sqlQuery = "select nom from phenotypes where nom like '%" + text + "%' and type='binomial'"
    phenotypesContaining = connect.fetchData(sqlQuery)

    #We merge the dataframes
    phenotypes = pandas.merge(pandas.merge(phenotypesStarting,
                                           phenotypesEnding,
                                           on="nom",
                                           how="outer"),
                              phenotypesContaining,
                              on="nom",
                              how="outer")
    # UserLogs.add(
    #     'Victor Dupuy',
    #     '255.255.255.255',
    #     'accessed the module : AutoComplete',
    #     'MySQL Database',
    #     ['phenotypes']
    # )
    phenotypes = phenotypes.to_json(orient='records')
    return HttpResponse(phenotypes)

Esempio n. 4

0

Mostra file

def autoCompleteGenes(request, text):
    #First, we fetch the genes starting by the text value
    sqlQuery = "select symbol from genes where symbol like '" + text + "%'"
    genesStarting = connect.fetchData(sqlQuery)
    #Same for the end of the name
    sqlQuery = "select symbol from genes where symbol like '%" + text + "'"
    genesEnding = connect.fetchData(sqlQuery)
    #Finally, same with the name containing the text value
    sqlQuery = "select symbol from genes where symbol like '%" + text + "%'"
    genesContaining = connect.fetchData(sqlQuery)
    #We merge the dataframes
    genes = pandas.merge(pandas.merge(genesStarting,
                                      genesEnding,
                                      on="symbol",
                                      how="outer"),
                         genesContaining,
                         on="symbol",
                         how="outer")
    # UserLogs.add(
    #     'Victor Dupuy',
    #     '255.255.255.255',
    #     'accessed the module : AutoComplete',
    #     'MySQL Database',
    #     ['genes']
    # )
    genes = genes.to_json(orient='records')
    return HttpResponse(genes)

Esempio n. 5

0

Mostra file

File: views.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def getManhattanData(type, value):

    # We'll need to get which phenotypes are selected.
    significantPhenotypes = getPhenotypes.getSignigicantPhenotypes(type, value)
    if significantPhenotypes.any()['nom']: # Check if there are significant phenotypes
        phenotype = getPhenotypes.getSignigicantPhenotypes(type, value).iat[0,0] #This gets the name of the significant phenotype
        # phenotype = "All cause death"
        # First we define the query
        sqlQuery="select distinct a.rs_id_assoc, a.chromosome,a.pos,a.info_assoc,a.pvalue_assoc,a.allele_A,a.allele_B,a.cohort_AA,a.cohort_BB,a.beta_assoc,a.maf, a.all_OR,xp.covariates,m.gene,m.gene_before,m.gene_after from assoc a join experiment xp on a.experiment=xp.idexperiment join phenotypes  p  on xp.phenotype=p.idphenotypes join marqueurs m on a.rs_id_assoc=m.nom where p.nom='"+phenotype+"' and a.pvalue_assoc<=0.001"

        # Then we fetch the data and store it in a dataframe
        sorted_data = connect.fetchData(sqlQuery)
        UserLogs.add(
            'Victor Dupuy',
            '255.255.255.255',
            'accessed the module : Interactive Manhattan',
            'MySQL Database',
            ['assoc', 'phenotypes', 'marqueurs']
        )
        sorted_data.drop_duplicates(subset='rs_id_assoc', inplace=True,keep='last')
        sorted_data["log10"] = -numpy.log10(sorted_data.pvalue_assoc)               #ADD COLUMN LOG10
        sorted_data = sorted_data.sort(['chromosome', 'pos'])
        sorted_data['even']=numpy.where(sorted_data['chromosome'] %2==0,sorted_data['log10'] , 'NaN')
        sorted_data["odd"]=numpy.where(sorted_data['chromosome'] %2!=0,sorted_data['log10'] , 'NaN')
        col=['rs_id_assoc', 'chromosome', 'pos', 'pvalue_assoc', 'allele_A', 'allele_B', 'covariates', 'cohort_BB', 'cohort_AA', 'beta_assoc', 'maf']
        return sorted_data, phenotype

    else:
        print "There is no phenotype !"
        return "No Phenotype !"

Esempio n. 6

0

Mostra file

File: views.py Progetto: BigDataehealthTools/GOAT_v3.1

def getManhattanData(type, value):

    # We'll need to get which phenotypes are selected.
    significantPhenotypes = getPhenotypes.getSignigicantPhenotypes(type, value)
    if significantPhenotypes.any()['nom']: # Check if there are significant phenotypes
        phenotype = getPhenotypes.getSignigicantPhenotypes(type, value).iat[0,0] #This gets the name of the significant phenotype
        # phenotype = "All cause death"
        # First we define the query
        sqlQuery="select distinct a.rs_id_assoc, a.chromosome,a.pos,a.info_assoc,a.pvalue_assoc,a.allele_A,a.allele_B,a.cohort_AA,a.cohort_BB,a.beta_assoc,a.maf, a.all_OR,xp.covariates,m.gene,m.gene_before,m.gene_after from assoc a join experiment xp on a.experiment=xp.idexperiment join phenotypes  p  on xp.phenotype=p.idphenotypes join marqueurs m on a.rs_id_assoc=m.nom where p.nom='"+phenotype+"' and a.pvalue_assoc<=0.001 limit 1000"

        # Then we fetch the data and store it in a dataframe
        sorted_data = connect.fetchData(sqlQuery)
        UserLogs.add(
            'Victor Dupuy',
            '255.255.255.255',
            'accessed the module : Interactive Manhattan',
            'MySQL Database',
            ['assoc', 'phenotypes', 'marqueurs']
        )
        sorted_data.drop_duplicates(subset='rs_id_assoc', inplace=True,keep='last')
        sorted_data["log10"] = -numpy.log10(sorted_data.pvalue_assoc)               #ADD COLUMN LOG10
        sorted_data = sorted_data.sort(['chromosome', 'pos'])
        sorted_data['even']=numpy.where(sorted_data['chromosome'] %2==0,sorted_data['log10'] , 'NaN')
        sorted_data["odd"]=numpy.where(sorted_data['chromosome'] %2!=0,sorted_data['log10'] , 'NaN')
        col=['rs_id_assoc', 'chromosome', 'pos', 'pvalue_assoc', 'allele_A', 'allele_B', 'covariates', 'cohort_BB', 'cohort_AA', 'beta_assoc', 'maf']
        return sorted_data, phenotype

    else:
        print "There is no phenotype !"
        return "No Phenotype !"

Esempio n. 7

0

Mostra file

File: getPhenotypes.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def getSignigicantPhenotypes(type, value):
    phenotypes = pandas.DataFrame()

    # First we check the type of the input (rsID, Gene or Phenotype) and act accordingly
    UserLogs.add(
        'Victor Dupuy',
        '255.255.255.255',
        'accessed the module : Tools - getPhenotypes',
        'MySQL Database',
        ['assoc', 'marqueurs', 'phenotypes' , 'experiment']
    )
    treshold = "0.001"

    if type == "rsID":
        phenotypes = connect.fetchData("select p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where pvalue_assoc <"+treshold+" and m.nom in ('"+value+"') order by a.pvalue_assoc ASC")
    elif type == "gene":
        phenotypes = connect.fetchData("select DISTINCT p.nom From marqueurs m join assoc a on a.rs_id_assoc = m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where pvalue_assoc <"+treshold+" and (m.gene in ('"+value+"') or m.gene_before in ('"+value+"') or m.gene_after in ('"+value+"') ) order by a.pvalue_assoc ASC ")
    else:
        phenotypes = pandas.DataFrame(data={'nom': value}, index=[0])

    return phenotypes

Esempio n. 8

0

Mostra file

File: views.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def autoCompleteGenes(request, text):
    # First, we fetch the genes starting by the text value
    sqlQuery = "select symbol from genes where symbol like '" + text + "%'"
    genesStarting = connect.fetchData(sqlQuery)
    # Same for the end of the name
    sqlQuery = "select symbol from genes where symbol like '%" + text + "'"
    genesEnding = connect.fetchData(sqlQuery)
    # Finally, same with the name containing the text value
    sqlQuery = "select symbol from genes where symbol like '%" + text + "%'"
    genesContaining = connect.fetchData(sqlQuery)
    # We merge the dataframes
    genes = pandas.merge(
        pandas.merge(genesStarting, genesEnding, on="symbol", how="outer"), genesContaining, on="symbol", how="outer"
    )
    # UserLogs.add(
    #     'Victor Dupuy',
    #     '255.255.255.255',
    #     'accessed the module : AutoComplete',
    #     'MySQL Database',
    #     ['genes']
    # )
    genes = genes.to_json(orient="records")
    return HttpResponse(genes)

Esempio n. 9

0

Mostra file

def table(request, type, value):
    print request, type, value
    global data

    UserLogs.add(
        'Victor Dupuy',
        '255.255.255.255',
        'accessed the module : Web',
        'MySQL Database',
        ['assoc', 'phenotypes', 'marqueurs']
    )

    # We'll need to get which phenotypes are selected.
    significantPhenotypes = getPhenotypes.getSignigicantPhenotypes(type, value)
    if significantPhenotypes.any()['nom']: # Check if there are significant phenotypes
        phenotype_selected = getPhenotypes.getSignigicantPhenotypes(type, value).iat[0,0] #This gets the name of the significant phenotype
        print significantPhenotypes
        # First we define the query
        sqlQuery = "select distinct a.rs_id_assoc,a.chromosome,a.pos,a.pvalue_assoc,m.gene_before,m.gene,m.gene_after,a.experiment,a.info_assoc,a.allele_A,a.allele_B,a.cohort_AA,a.cohort_BB,a.beta_assoc,a.maf,a.all_OR,xp.covariates, p.nom, case when a.beta_assoc>0 and p.Risk_on_rise is true then a.allele_B when a.beta_assoc<0 and p.Risk_on_rise is true then a.allele_A when a.beta_assoc>0 and p.Risk_on_rise is false then a.allele_A when a.beta_assoc<0 and p.Risk_on_rise is false then a.allele_B end as risk_allele, case when a.beta_assoc>0 and p.Risk_on_rise is true then ( (2*a.cohort_BB) + a.cohort_AB ) / ((2*a.cohort_AA) + (2*a.cohort_AB) +(2*a.cohort_BB) ) when a.beta_assoc<0 and p.Risk_on_rise is true then ( (2*a.cohort_AA) + a.cohort_AB ) / ((2*a.cohort_AA) + (2*a.cohort_AB) +(2*a.cohort_BB) ) when a.beta_assoc>0 and p.Risk_on_rise is false then ( (2*a.cohort_AA) + a.cohort_AB ) / ((2*a.cohort_AA) + (2*a.cohort_AB) +(2*a.cohort_BB) ) when a.beta_assoc<0 and p.Risk_on_rise is false then ( (2*a.cohort_BB) + a.cohort_AB ) / ((2*a.cohort_AA) + (2*a.cohort_AB) +(2*a.cohort_BB) ) end as risk_af, case when a.beta_assoc>0 and p.Risk_on_rise is true then a.beta_assoc when a.beta_assoc<0 and p.Risk_on_rise is true then a.beta_assoc*(-1) when a.beta_assoc>0 and p.Risk_on_rise is false then a.beta_assoc*(-1) when a.beta_assoc<0 and p.Risk_on_rise is false then a.beta_assoc end as risk_allele_beta from assoc a join experiment xp on a.experiment=xp.idexperiment join phenotypes  p  on xp.phenotype=p.idphenotypes join marqueurs m on a.rs_id_assoc=m.nom where p.nom='"+phenotype_selected+"' and a.pvalue_assoc<0.001"

        # Then we fetch the data and store it in a dataframe
        dataframe = connect.fetchData(sqlQuery)
        # dataframe.rename(columns={'rs_id_assoc':'rs id assoc'})  #Self-explanatory
        data = dataframe
        dataframe.rename(columns = {
            'rs_id_assoc': 'rs_ID',
            'pos': 'Pos',
            'chromosome': 'Chr',
            'gene_before': 'GeneBefore',
            'pvalue_assoc': 'P-value',
            'allele_A': 'Allele A',
            'allele_B': 'Allele B',
            'cohort_AA' : 'cohort AA',
            'cohort_BB' : 'cohort BB',
            'beta_assoc' : 'Beta Assoc',
            'covariates' : 'Covariates',
            'risk_allele' : 'Risk Allele',
            'risk_af' : 'Risk Af',
            'risk_allele_beta' : 'Risk Allele Beta',
            'gene': 'Gene',
            'gene_after': 'GeneAfter',
            'nom' : 'Phenotype'
        }, inplace=True)  #rename column to make them look nicer
        jsonData = dataframe.to_json(orient='records')
        significantPhenotypes.rename(columns={'nom' : 'Phenotypes'}, inplace=True);
        # Then we pass the dataframe to the client with json format
        response = json.dumps({
                'noResult' : False,
                'phenotypes': significantPhenotypes.to_json(orient='records'),
                'data' : jsonData
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': ')
        )
        return HttpResponse(response)
    else:
        print "There is no phenotype !"
        response = json.dumps({
                'noResult' : True,
                'phenotypes': [],
                'data' : []
            },
            sort_keys=True,
            indent=4,
            separators=(',', ': ')
        )
        return HttpResponse(response)

Esempio n. 10

0

Mostra file

File: getPhenotypes.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def getAll():
    phenotypes = connect.fetchData("select nom from phenotypes where type='binomial';")
    return phenotypes

Esempio n. 11

0

Mostra file

def comparison(phenotypes):
    phenotype1=connect.fetchData("select distinct a.rs_id_assoc,a.chromosome, a.pos, a.allele_A, a.allele_B,m.gene_before,m.gene, m.gene_after from assoc a join marqueurs m on a.rs_id_assoc=m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where a. pvalue_assoc<0.001 and p.nom = '"+phenotype1+"'")
    phenotype2=connect.fetchData("select distinct a.rs_id_assoc,a.chromosome, a.pos, a.allele_A, a.allele_B,m.gene_before,m.gene, m.gene_after from assoc a join marqueurs m on a.rs_id_assoc=m.nom JOIN experiment xp on a.experiment = xp.idexperiment join phenotypes p on xp.phenotype=p.idphenotypes where a. pvalue_assoc<0.001 and  p.nom = '"+phenotype2+"'")
    data = pd.merge(chrarea1,chrarea2,on='rs_id_assoc',how='inner')

Esempio n. 12

0

Mostra file

File: views.py Progetto: helvanic/GOAT_Genetic_Output_Analytic_Tool

def getAreaSelectionData(position_min, position_max, chromosome, phenotype):
    #We query the database for the relevant snps around our selected position

    sqlQuery = "select distinct a.rs_id_assoc , a.chromosome,a.pos,a.info_assoc,a.pvalue_assoc,a.allele_A,a.allele_B,a.cohort_AA,a.cohort_BB, a.cohort_AB, a.beta_assoc,a.maf, a.all_OR,xp.covariates,p.Risk_on_rise,dat.name from assoc a join experiment xp on a.experiment=xp.idexperiment join dataset dat on xp.dataset=dat.iddataset join phenotypes  p  on xp.phenotype=p.idphenotypes where p.nom='"+phenotype+"' and a.chromosome="+chromosome+" limit 1000000;"
    snps = connect.fetchData(sqlQuery)
    UserLogs.add(
        'Victor Dupuy',
        '255.255.255.255',
        'accessed the module : AreaSelection',
        'MySQL Database',
        ['assoc', 'phenotypes', 'experiment']
    )
    snps = snps[(snps['pos']>position_min) & (snps['pos']<position_max)]

    snps["risk_allele"] = numpy.where(\
                            numpy.logical_or(\
                                numpy.logical_and(snps['beta_assoc']>0, snps['Risk_on_rise']==1),\
                                numpy.logical_and(snps['beta_assoc']<0, snps['Risk_on_rise']==0)\
                            ),\
                            snps['allele_B'],\
                            snps['allele_A']\
                          )
    # snps["risk_allele"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), snps['allele_B'], snps['allele_A'])             # select risk allele
    snps["risk_af"] = numpy.where(\
                        numpy.logical_or(\
                            numpy.logical_and(snps['beta_assoc'] > 0,snps['Risk_on_rise']==1),\
                            numpy.logical_and(snps['beta_assoc'] < 0,snps['Risk_on_rise']==0)\
                        ),\
                        ((2*snps["cohort_BB"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"])),\
                        ((2*snps["cohort_AA"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"]))\
                      )
    # snps["risk_af"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), ((2*snps["cohort_BB"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"])), ((2*snps["cohort_AA"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"]))) #calculate allele frequency for each allele
    snps["risk_allele_beta"] = numpy.where(\
                        numpy.logical_or(\
                            numpy.logical_and(snps['beta_assoc'] > 0,snps['Risk_on_rise']==1),\
                            numpy.logical_and(snps['beta_assoc'] < 0,snps['Risk_on_rise']==0)\
                        ),\
                        snps['beta_assoc'],\
                        snps['beta_assoc']*-1\
                      )
    # snps["risk_allele_beta"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), snps['beta_assoc'], snps['beta_assoc']*-1)         #update beta according to risk allele result
    snps.rename(columns = {'nom' : 'rs_id_assoc'}, inplace=True)

    sqlQuery2 = ("select m.nom, m.gene, m.gene_before, m.gene_after, m.end_gen_after,m.end_gen,m.start_gen,m.end_gen_before,m.func,m.position,m.start_gen_after,m.start_gen_before, m.observed "
          +" from marqueurs m where m.chromosome="+chromosome+" and position between "+str(position_min)+" and "+str(position_max))
    lastSnps= connect.fetchData(sqlQuery2)
    UserLogs.add(
        'Victor Dupuy',
        '255.255.255.255',
        'accessed the module : AreaSelection',
        'MySQL Database',
        ['marqueurs']
    )
    lastSnps.rename(columns = {'nom':'rs_id_assoc'}, inplace=True)
    #We sort the dataframe by the snp's position
    lastSnps.sort_values(by="position")
    # print lastSnps
    # Then we create the Plot Title :
    title = "CHROMOSOME :"+str(chromosome)+" \nBETWEEN POSITIONS "+str(position_min)+" AND "+str(position_max)

    # Then we merge the two dataframes together.
    pandas.set_option('display.max_colwidth',-1)                        #important to make links appear in pandas dataframe
    snps = pandas.merge(snps,lastSnps,on='rs_id_assoc',how='outer')
    del snps["pos"]

    return snps

Esempio n. 13

0

Mostra file

File: views.py Progetto: bkanzki/GOAT_Genetic_Output_Analysis_Tool

def getAreaSelectionData(position_min, position_max, chromosome, phenotype):
    #We query the database for the relevant snps around our selected position

    sqlQuery = "select distinct a.rs_id_assoc , a.chromosome,a.pos,a.info_assoc,a.pvalue_assoc,a.allele_A,a.allele_B,a.cohort_AA,a.cohort_BB, a.cohort_AB, a.beta_assoc,a.maf, a.all_OR,xp.covariates,p.Risk_on_rise,dat.name from assoc a join experiment xp on a.experiment=xp.idexperiment join dataset dat on xp.dataset=dat.iddataset join phenotypes  p  on xp.phenotype=p.idphenotypes where p.nom='" + phenotype + "' and a.chromosome=" + chromosome + " limit 1000000;"
    snps = connect.fetchData(sqlQuery)
    UserLogs.add('Victor Dupuy', '255.255.255.255',
                 'accessed the module : AreaSelection', 'MySQL Database',
                 ['assoc', 'phenotypes', 'experiment'])
    snps = snps[(snps['pos'] > position_min) & (snps['pos'] < position_max)]

    snps["risk_allele"] = numpy.where(\
                            numpy.logical_or(\
                                numpy.logical_and(snps['beta_assoc']>0, snps['Risk_on_rise']==1),\
                                numpy.logical_and(snps['beta_assoc']<0, snps['Risk_on_rise']==0)\
                            ),\
                            snps['allele_B'],\
                            snps['allele_A']\
                          )
    # snps["risk_allele"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), snps['allele_B'], snps['allele_A'])             # select risk allele
    snps["risk_af"] = numpy.where(\
                        numpy.logical_or(\
                            numpy.logical_and(snps['beta_assoc'] > 0,snps['Risk_on_rise']==1),\
                            numpy.logical_and(snps['beta_assoc'] < 0,snps['Risk_on_rise']==0)\
                        ),\
                        ((2*snps["cohort_BB"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"])),\
                        ((2*snps["cohort_AA"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"]))\
                      )
    # snps["risk_af"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), ((2*snps["cohort_BB"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"])), ((2*snps["cohort_AA"])+snps["cohort_AB"])/((2*snps["cohort_AA"])+(2*snps["cohort_AB"])+(2*snps["cohort_BB"]))) #calculate allele frequency for each allele
    snps["risk_allele_beta"] = numpy.where(\
                        numpy.logical_or(\
                            numpy.logical_and(snps['beta_assoc'] > 0,snps['Risk_on_rise']==1),\
                            numpy.logical_and(snps['beta_assoc'] < 0,snps['Risk_on_rise']==0)\
                        ),\
                        snps['beta_assoc'],\
                        snps['beta_assoc']*-1\
                      )
    # snps["risk_allele_beta"]=numpy.where((snps['beta_assoc'] > 0 & snps['Risk_on_rise']==1)|(snps['beta_assoc'] < 0 & snps['Risk_on_rise']==0), snps['beta_assoc'], snps['beta_assoc']*-1)         #update beta according to risk allele result
    snps.rename(columns={'nom': 'rs_id_assoc'}, inplace=True)

    sqlQuery2 = (
        "select m.nom, m.gene, m.gene_before, m.gene_after, m.end_gen_after,m.end_gen,m.start_gen,m.end_gen_before,m.func,m.position,m.start_gen_after,m.start_gen_before, m.observed "
        + " from marqueurs m where m.chromosome=" + chromosome +
        " and position between " + str(position_min) + " and " +
        str(position_max))
    lastSnps = connect.fetchData(sqlQuery2)
    UserLogs.add('Victor Dupuy', '255.255.255.255',
                 'accessed the module : AreaSelection', 'MySQL Database',
                 ['marqueurs'])
    lastSnps.rename(columns={'nom': 'rs_id_assoc'}, inplace=True)
    #We sort the dataframe by the snp's position
    lastSnps.sort_values(by="position")
    # print lastSnps
    # Then we create the Plot Title :
    title = "CHROMOSOME :" + str(chromosome) + " \nBETWEEN POSITIONS " + str(
        position_min) + " AND " + str(position_max)

    # Then we merge the two dataframes together.
    pandas.set_option('display.max_colwidth',
                      -1)  #important to make links appear in pandas dataframe
    snps = pandas.merge(snps, lastSnps, on='rs_id_assoc', how='outer')
    del snps["pos"]

    return snps

Esempio n. 14

0

Mostra file

File: getPhenotypes.py Progetto: BigDataehealthTools/GOAT_v3.1

def getAll():
    phenotypes = connect.fetchData(
        "select nom from phenotypes where type='binomial';")
    return phenotypes