Beispiel #1
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Beispiel #2
0
def find_outliers(vals):
    """Calculates the upper and lower bounds of a set of sample/case values


    >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
    (11.252500000000001, 0.5364999999999993)

    >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
    (32.0, -8.0)

    If there are no vals, returns None for the upper and lower bounds,
    which code that calls it will have to deal with.
    >>> find_outliers([])
    (None, None)

    """

    logger.debug("xerxes vals is:", pf(vals))

    if vals:
        #logger.debug("vals is:", pf(vals))
        stats = corestats.Stats(vals)
        low_hinge = stats.percentile(25)
        up_hinge = stats.percentile(75)
        hstep = 1.5 * (up_hinge - low_hinge)

        upper_bound = up_hinge + hstep
        lower_bound = low_hinge - hstep

    else:
        upper_bound = None
        lower_bound = None

    logger.debug(pf(locals()))
    return upper_bound, lower_bound
Beispiel #3
0
def search_page():
    logger.info("in search_page")
    logger.info(request.url)
    result = None
    if USE_REDIS:
        with Bench("Trying Redis cache"):
            key = "search_results:v1:" + json.dumps(request.args, sort_keys=True)
            logger.debug("key is:", pf(key))
            result = Redis.get(key)
            if result:
                logger.info("Redis cache hit on search results!")
                result = pickle.loads(result)
    else:
        logger.info("Skipping Redis cache (USE_REDIS=False)")

    logger.info("request.args is", request.args)
    the_search = search_results.SearchResultPage(request.args)
    result = the_search.__dict__
    valid_search = result['search_term_exists']

    logger.debugf("result", result)

    if USE_REDIS and valid_search:
        Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
        Redis.expire(key, 60*60)

    if valid_search:
        return render_template("search_result_page.html", **result)
    else:
        return render_template("search_error.html")
Beispiel #4
0
def find_outliers(vals):
    """Calculates the upper and lower bounds of a set of sample/case values


    >>> find_outliers([3.504, 5.234, 6.123, 7.234, 3.542, 5.341, 7.852, 4.555, 12.537])
    (11.252500000000001, 0.5364999999999993)

    >>> >>> find_outliers([9,12,15,17,31,50,7,5,6,8])
    (32.0, -8.0)

    If there are no vals, returns None for the upper and lower bounds,
    which code that calls it will have to deal with.
    >>> find_outliers([])
    (None, None)

    """

    logger.debug("xerxes vals is:", pf(vals))

    if vals:
        #logger.debug("vals is:", pf(vals))
        stats = corestats.Stats(vals)
        low_hinge = stats.percentile(25)
        up_hinge = stats.percentile(75)
        hstep = 1.5 * (up_hinge - low_hinge)

        upper_bound = up_hinge + hstep
        lower_bound = low_hinge - hstep

    else:
        upper_bound = None
        lower_bound = None

    logger.debug(pf(locals()))
    return upper_bound, lower_bound
Beispiel #5
0
    def trim_results(self, p_values):
        logger.debug("len_p_values:", len(p_values))
        if len(p_values) > 500:
            p_values.sort(reverse=True)
            trimmed_values = p_values[:500]

        return trimmed_values
Beispiel #6
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open(
            "{}/{}_covariates.txt".format(flat_files('mapping'),
                                          this_dataset.group.name),
            "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Beispiel #7
0
def loading_page():
    logger.info(request.url)
    initial_start_vars = request.form
    logger.debug("Marker regression called with initial_start_vars:",
                 initial_start_vars.items())
    #temp_uuid = initial_start_vars['temp_uuid']
    wanted = ('temp_uuid', 'trait_id', 'dataset', 'method', 'trimmed_markers',
              'selected_chr', 'chromosomes', 'mapping_scale', 'score_type',
              'suggestive', 'significant', 'num_perm', 'permCheck',
              'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results',
              'LRSCheck', 'covariates', 'maf', 'use_loco', 'manhattan_plot',
              'control_marker', 'control_marker_db', 'do_control', 'genofile',
              'pair_scan', 'startMb', 'endMb', 'graphWidth', 'lrsMax',
              'additiveCheck', 'showSNP', 'showGenes', 'viewLegend',
              'haplotypeAnalystCheck', 'mapmethod_rqtl_geno',
              'mapmodel_rqtl_geno')
    start_vars_container = {}
    start_vars = {}
    for key, value in initial_start_vars.iteritems():
        if key in wanted or key.startswith(('value:')):
            start_vars[key] = value

    start_vars_container['start_vars'] = start_vars
    rendered_template = render_template("loading.html", **start_vars_container)

    return rendered_template
def export_mapping_results(dataset, trait, markers, results_path, mapping_scale, score_type):
    with open(results_path, "w+") as output_file:
        output_file.write("Population: " + dataset.group.species.title() + " " + dataset.group.name + "\n")
        output_file.write("Data Set: " + dataset.fullname + "\n")
        if dataset.type == "ProbeSet":
            output_file.write("Gene Symbol: " + trait.symbol + "\n")
            output_file.write("Location: " + str(trait.chr) + " @ " + str(trait.mb) + " Mb\n")
        output_file.write("\n")
        output_file.write("Name,Chr,")
        if mapping_scale == "physic":
            output_file.write("Mb," + score_type)
        else:
            output_file.write("Cm," + score_type)
        if "additive" in markers[0].keys():
            output_file.write(",Additive")
        if "dominance" in markers[0].keys():
            output_file.write(",Dominance")
        output_file.write("\n")
        for i, marker in enumerate(markers):
            logger.debug("THE MARKER:", marker)
            output_file.write(marker['name'] + "," + str(marker['chr']) + "," + str(marker['Mb']) + ",")
            if "lod_score" in marker.keys():
                output_file.write(str(marker['lod_score']))
            else:
                output_file.write(str(marker['lrs_value']))
            if "additive" in marker.keys():
                output_file.write("," + str(marker['additive']))
            if "dominance" in marker.keys():
                output_file.write("," + str(marker['dominance']))
            if i < (len(markers) - 1):
                output_file.write("\n")
Beispiel #9
0
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))

    gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename)

    plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (
        PLINK_PATH, dataset.group.name, PLINK_PATH, dataset.group.name,
        TMPDIR, plink_output_filename, this_trait.name, maf, TMPDIR,
        plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    #for marker in self.dataset.group.markers.markers:
    #    if marker['name'] not in included_markers:
    #        logger.debug("marker:", marker)
    #        self.dataset.group.markers.markers.remove(marker)
    #        #del self.dataset.group.markers.markers[marker]

    logger.debug("p_values:", pf(p_values))
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
    def trim_results(self, p_values):
        logger.debug("len_p_values:", len(p_values))
        if len(p_values) > 500:
            p_values.sort(reverse=True)
            trimmed_values = p_values[:500]

        return trimmed_values
def search_page():
    logger.info("in search_page")
    logger.info(request.url)
    result = None
    if USE_REDIS:
        with Bench("Trying Redis cache"):
            key = "search_results:v1:" + \
                json.dumps(request.args, sort_keys=True)
            logger.debug("key is:", pf(key))
            result = Redis.get(key)
            if result:
                logger.info("Redis cache hit on search results!")
                result = pickle.loads(result)
    else:
        logger.info("Skipping Redis cache (USE_REDIS=False)")

    logger.info("request.args is", request.args)
    the_search = SearchResultPage(request.args)
    result = the_search.__dict__
    valid_search = result['search_term_exists']

    if USE_REDIS and valid_search:
        Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
        Redis.expire(key, 60 * 60)

    if valid_search:
        return render_template("search_result_page.html", **result)
    else:
        return render_template("search_error.html")
Beispiel #12
0
def connect_db():
    logger.info("@app.before_request connect_db")
    db = getattr(g, '_database', None)
    if db is None:
        logger.debug("Get new database connector")
        g.db = g._database = sqlalchemy.create_engine(SQL_URI,
                                                      encoding="latin1")
        logger.debug(g.db)
Beispiel #13
0
def init_db():
    # import all modules here that might define models so that
    # they will be registered properly on the metadata.  Otherwise
    # you will have to import them first before calling init_db()
    #import yourapplication.models
    import wqflask.model
    logger.debug("Creating all model metadata")
    Base.metadata.create_all(bind=engine)
    logger.info("Done creating all model metadata")
Beispiel #14
0
def init_db():
    # import all modules here that might define models so that
    # they will be registered properly on the metadata.  Otherwise
    # you will have to import them first before calling init_db()
    #import yourapplication.models
    import wqflask.model
    logger.debug("Creating all model metadata")
    Base.metadata.create_all(bind=engine)
    logger.info("Done creating all model metadata")
Beispiel #15
0
def loading_page():
    logger.info(request.url)
    initial_start_vars = request.form
    logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items())
    #temp_uuid = initial_start_vars['temp_uuid']
    wanted = (
        'temp_uuid',
        'trait_id',
        'dataset',
        'method',
        'trimmed_markers',
        'selected_chr',
        'chromosomes',
        'mapping_scale',
        'score_type',
        'suggestive',
        'significant',
        'num_perm',
        'permCheck',
        'perm_output',
        'num_bootstrap',
        'bootCheck',
        'bootstrap_results',
        'LRSCheck',
        'covariates',
        'maf',
        'use_loco',
        'manhattan_plot',
        'control_marker',
        'control_marker_db',
        'do_control',
        'genofile',
        'pair_scan',
        'startMb',
        'endMb',
        'graphWidth',
        'lrsMax',
        'additiveCheck',
        'showSNP',
        'showGenes',
        'viewLegend',
        'haplotypeAnalystCheck',
        'mapmethod_rqtl_geno',
        'mapmodel_rqtl_geno'
    )
    start_vars_container = {}
    start_vars = {}
    for key, value in initial_start_vars.iteritems():
        if key in wanted or key.startswith(('value:')):
            start_vars[key] = value

    start_vars_container['start_vars'] = start_vars
    rendered_template = render_template("loading.html", **start_vars_container)

    return rendered_template
    def get_lod_score_cutoff(self):
        logger.debug("INSIDE GET LOD CUTOFF")
        high_qtl_count = 0
        for marker in self.dataset.group.markers.markers:
            if marker['lod_score'] > 1:
                high_qtl_count += 1

        if high_qtl_count > 1000:
            return 1
        else:
            return 0
Beispiel #17
0
    def get_lod_score_cutoff(self):
        logger.debug("INSIDE GET LOD CUTOFF")
        high_qtl_count = 0
        for marker in self.dataset.group.markers.markers:
            if marker['lod_score'] > 1:
                high_qtl_count += 1

        if high_qtl_count > 1000:
            return 1
        else:
            return 0
def run_reaper(this_trait, this_dataset, samples, vals, json_data, num_perm, boot_check, num_bootstrap, do_control, control_marker, manhattan_plot, first_run=True, output_files=None):
    """Generates p-values for each marker using qtlreaper"""

    if first_run:
        if this_dataset.group.genofile != None:
            genofile_name = this_dataset.group.genofile[:-5]
        else:
            genofile_name = this_dataset.group.name

        trait_filename = str(this_trait.name) + "_" + str(this_dataset.name) + "_pheno"
        gen_pheno_txt_file(samples, vals, trait_filename)

        output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
        bootstrap_filename = None
        permu_filename = None

        opt_list = []
        if boot_check and num_bootstrap > 0:
            bootstrap_filename = this_dataset.group.name + "_BOOTSTRAP_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))

            opt_list.append("-b")
            opt_list.append("--n_bootstrap " + str(num_bootstrap))
            opt_list.append("--bootstrap_output " + webqtlConfig.GENERATED_IMAGE_DIR + bootstrap_filename + ".txt")
        if num_perm > 0:
            permu_filename = this_dataset.group.name + "_PERM_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
            opt_list.append("-n " + str(num_perm))
            opt_list.append("--permu_output " + webqtlConfig.GENERATED_IMAGE_DIR + permu_filename + ".txt")
        if control_marker != "" and do_control == "true":
            opt_list.append("-c " + control_marker)

        reaper_command = REAPER_COMMAND + ' --geno {0}/{1}.geno --traits {2}/gn2/{3}.txt {4} -o {5}{6}.txt'.format(flat_files('genotype'),
                                                                                                                genofile_name,
                                                                                                                TEMPDIR,
                                                                                                                trait_filename,
                                                                                                                " ".join(opt_list),
                                                                                                                webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                                                output_filename)

        logger.debug("reaper_command:" + reaper_command)
        os.system(reaper_command)
    else:
        output_filename, permu_filename, bootstrap_filename = output_files

    marker_obs, permu_vals, bootstrap_vals = parse_reaper_output(output_filename, permu_filename, bootstrap_filename)

    suggestive = 0
    significant = 0
    if len(permu_vals) > 0:
        suggestive = permu_vals[int(num_perm*0.37-1)]
        significant = permu_vals[int(num_perm*0.95-1)]

    return marker_obs, permu_vals, suggestive, significant, bootstrap_vals, [output_filename, permu_filename, bootstrap_filename]
Beispiel #19
0
    def __init__(self, dataset, sample_names, this_trait, sample_group_type,
                 header):

        self.dataset = dataset
        self.this_trait = this_trait
        self.sample_group_type = sample_group_type  # primary or other
        self.header = header

        self.sample_list = []  # The actual list
        self.sample_attribute_values = {}

        self.get_attributes()
        # logger.debug("camera: attributes are:", pf(self.attributes))

        if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet':
            self.get_extra_attribute_values()

        for counter, sample_name in enumerate(sample_names, 1):
            sample_name = sample_name.replace("_2nd_", "")

            #ZS - If there's no value for the sample/strain, create the sample object (so samples with no value are still displayed in the table)
            try:
                sample = self.this_trait.data[sample_name]
            except KeyError:
                logger.debug("No sample %s, let's create it now" % sample_name)
                sample = webqtlCaseData.webqtlCaseData(sample_name)

            #sampleNameAdd = ''
            #if fd.RISet == 'AXBXA' and sampleName in ('AXB18/19/20','AXB13/14','BXA8/17'):
            #    sampleNameAdd = HT.Href(url='/mouseCross.html#AXB/BXA', text=HT.Sup('#'), Class='fs12', target="_blank")
            sample.extra_info = {}
            if self.dataset.group.name == 'AXBXA' and sample_name in (
                    'AXB18/19/20', 'AXB13/14', 'BXA8/17'):
                sample.extra_info['url'] = "/mouseCross.html#AXB/BXA"
                sample.extra_info['css_class'] = "fs12"

            # logger.debug("  type of sample:", type(sample))

            if sample_group_type == 'primary':
                sample.this_id = "Primary_" + str(counter)
            else:
                sample.this_id = "Other_" + str(counter)

            #### For extra attribute columns; currently only used by several datasets - Zach
            if self.sample_attribute_values:
                sample.extra_attributes = self.sample_attribute_values.get(
                    sample_name, {})
                logger.debug("sample.extra_attributes is",
                             pf(sample.extra_attributes))

            self.sample_list.append(sample)

        logger.debug("self.attributes is", pf(self.attributes))

        self.do_outliers()
        #do_outliers(the_samples)
        logger.debug("*the_samples are [%i]: %s" %
                     (len(self.sample_list), pf(self.sample_list)))
Beispiel #20
0
    def __init__(self,
                 dataset,
                 sample_names,
                 this_trait,
                 sample_group_type,
                 header):

        self.dataset = dataset
        self.this_trait = this_trait
        self.sample_group_type = sample_group_type    # primary or other
        self.header = header

        self.sample_list = [] # The actual list
        self.sample_attribute_values = {}

        self.get_attributes()
        # logger.debug("camera: attributes are:", pf(self.attributes))

        if self.this_trait and self.dataset and self.dataset.type == 'ProbeSet':
            self.get_extra_attribute_values()

        for counter, sample_name in enumerate(sample_names, 1):
            sample_name = sample_name.replace("_2nd_", "")

            #ZS - If there's no value for the sample/strain, create the sample object (so samples with no value are still displayed in the table)
            try:
                sample = self.this_trait.data[sample_name]
            except KeyError:
                logger.debug("No sample %s, let's create it now" % sample_name)
                sample = webqtlCaseData.webqtlCaseData(sample_name)

            #sampleNameAdd = ''
            #if fd.RISet == 'AXBXA' and sampleName in ('AXB18/19/20','AXB13/14','BXA8/17'):
            #    sampleNameAdd = HT.Href(url='/mouseCross.html#AXB/BXA', text=HT.Sup('#'), Class='fs12', target="_blank")
            sample.extra_info = {}
            if self.dataset.group.name == 'AXBXA' and sample_name in ('AXB18/19/20','AXB13/14','BXA8/17'):
                sample.extra_info['url'] = "/mouseCross.html#AXB/BXA"
                sample.extra_info['css_class'] = "fs12"

            # logger.debug("  type of sample:", type(sample))

            if sample_group_type == 'primary':
                sample.this_id = "Primary_" + str(counter)
            else:
                sample.this_id = "Other_" + str(counter)

            #### For extra attribute columns; currently only used by several datasets - Zach
            if self.sample_attribute_values:
                sample.extra_attributes = self.sample_attribute_values.get(sample_name, {})
                logger.debug("sample.extra_attributes is", pf(sample.extra_attributes))

            self.sample_list.append(sample)

        logger.debug("self.attributes is", pf(self.attributes))

        self.do_outliers()
        #do_outliers(the_samples)
        logger.debug("*the_samples are [%i]: %s" % (len(self.sample_list), pf(self.sample_list)))
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr(
        f"{dataset.group.name}_{this_trait.name}_")
    gen_pheno_txt_file(dataset, vals)

    plink_command = f"{PLINK_COMMAND}  --noweb --bfile {flat_files('mapping')}/{dataset.group.name} --no-pheno --no-fid --no-parents --no-sex --maf {maf} --out { TMPDIR}{plink_output_filename} --assoc "
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))
    gen_pheno_txt_file(dataset, vals)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
Beispiel #23
0
    def gen_human_results(self, pheno_vector, key, temp_uuid):
        file_base = locate(self.dataset.group.name, "mapping")

        plink_input = input.plink(file_base, type='b')
        input_file_name = os.path.join(webqtlConfig.SNP_PATH,
                                       self.dataset.group.name + ".snps.gz")

        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
        covariate_matrix = np.ones((pheno_vector.shape[0], 1))
        kinship_matrix = np.fromfile(open(file_base + '.kin', 'r'), sep=" ")
        kinship_matrix.resize(
            (len(plink_input.indivs), len(plink_input.indivs)))

        logger.debug("Before creating params")

        params = dict(
            pheno_vector=pheno_vector.tolist(),
            covariate_matrix=covariate_matrix.tolist(),
            input_file_name=input_file_name,
            kinship_matrix=kinship_matrix.tolist(),
            refit=False,
            temp_uuid=temp_uuid,

            # meta data
            timestamp=datetime.datetime.now().isoformat(),
        )

        logger.debug("After creating params")

        json_params = json.dumps(params)
        Redis.set(key, json_params)
        Redis.expire(key, 60 * 60)

        logger.debug("Before creating the command")

        command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "human")

        logger.debug("command is:", command)

        os.system(command)

        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60)
        results = json.loads(json_results[1])
        t_stats = results['t_stats']
        p_values = results['p_values']

        #p_values, t_stats = lmm.run_human(key)

        #p_values, t_stats = lmm.run_human(
        #        pheno_vector,
        #        covariate_matrix,
        #        input_file_name,
        #        kinship_matrix,
        #        loading_progress=tempdata
        #    )

        return p_values, t_stats
Beispiel #24
0
    def gen_human_results(self, pheno_vector, key, temp_uuid):
        file_base = locate(self.dataset.group.name,"mapping")

        plink_input = input.plink(file_base, type='b')
        input_file_name = os.path.join(webqtlConfig.SNP_PATH, self.dataset.group.name + ".snps.gz")

        pheno_vector = pheno_vector.reshape((len(pheno_vector), 1))
        covariate_matrix = np.ones((pheno_vector.shape[0],1))
        kinship_matrix = np.fromfile(open(file_base + '.kin','r'),sep=" ")
        kinship_matrix.resize((len(plink_input.indivs),len(plink_input.indivs)))

        logger.debug("Before creating params")

        params = dict(pheno_vector = pheno_vector.tolist(),
                    covariate_matrix = covariate_matrix.tolist(),
                    input_file_name = input_file_name,
                    kinship_matrix = kinship_matrix.tolist(),
                    refit = False,
                    temp_uuid = temp_uuid,

                    # meta data
                    timestamp = datetime.datetime.now().isoformat(),
                    )

        logger.debug("After creating params")

        json_params = json.dumps(params)
        Redis.set(key, json_params)
        Redis.expire(key, 60*60)

        logger.debug("Before creating the command")

        command = PYLMM_COMMAND+' --key {} --species {}'.format(key,
                                                                                                                "human")

        logger.debug("command is:", command)

        os.system(command)

        json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
        results = json.loads(json_results[1])
        t_stats = results['t_stats']
        p_values = results['p_values']


        #p_values, t_stats = lmm.run_human(key)

        #p_values, t_stats = lmm.run_human(
        #        pheno_vector,
        #        covariate_matrix,
        #        input_file_name,
        #        kinship_matrix,
        #        loading_progress=tempdata
        #    )

        return p_values, t_stats
Beispiel #25
0
def check_access_permissions():
    logger.debug("@app.before_request check_access_permissions")
    available = True
    if 'dataset' in request.args:
        permissions = DEFAULT_PRIVILEGES
        if request.args['dataset'] != "Temp":
            dataset = create_dataset(request.args['dataset'])

            if dataset.type == "Temp":
                permissions = DEFAULT_PRIVILEGES
            elif 'trait_id' in request.args:
                permissions = check_resource_availability(
                    dataset, request.args['trait_id'])
            elif dataset.type != "Publish":
                permissions = check_resource_availability(dataset)

        if 'view' not in permissions['data']:
            return redirect(url_for("no_access_page"))
Beispiel #26
0
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr(
        "%s_%s_" % (dataset.group.name, this_trait.name))
    gen_pheno_txt_file(dataset, vals)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-pheno --no-fid --no-parents --no-sex --maf %s --out %s%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR,
        plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
Beispiel #27
0
    def get_attributes(self):
        """Finds which extra attributes apply to this dataset"""

        # Get attribute names and distinct values for each attribute
        results = g.db.execute('''
                        SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttributeXRef.Value
                        FROM CaseAttribute, CaseAttributeXRef
                        WHERE CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id
                        AND CaseAttributeXRef.ProbeSetFreezeId = %s
                        ORDER BY CaseAttribute.Name''', (str(self.dataset.id),))

        self.attributes = {}
        for attr, values in itertools.groupby(results.fetchall(), lambda row: (row.Id, row.Name)):
            key, name = attr
            logger.debug("radish: %s - %s" % (key, name))
            self.attributes[key] = Bunch()
            self.attributes[key].name = name
            self.attributes[key].distinct_values = [item.Value for item in values]
            self.attributes[key].distinct_values.sort(key=natural_sort_key)
Beispiel #28
0
def create_marker_covariates(control_marker, cross):
    ro.globalenv["the_cross"] = cross
    ro.r('genotypes <- pull.geno(the_cross)')  # Get the genotype matrix
    userinput_sanitized = control_marker.replace(" ", "").split(
        ",")  # TODO: sanitize user input, Never Ever trust a user
    logger.debug(userinput_sanitized)
    if len(userinput_sanitized) > 0:
        covariate_names = ', '.join('"{0}"'.format(w)
                                    for w in userinput_sanitized)
        ro.r('covnames <- c(' + covariate_names + ')')
    else:
        ro.r('covnames <- c()')
    ro.r('covInGeno <- which(covnames %in% colnames(genotypes))')
    ro.r('covnames <- covnames[covInGeno]')
    ro.r("cat('covnames (purged): ', covnames,'\n')")
    ro.r(
        'marker_covars <- genotypes[,covnames]'
    )  # Get the covariate matrix by using the marker name as index to the genotype file
    # TODO: Create a design matrix from the marker covars for the markers in case of an F2, 4way, etc
    return ro.r["marker_covars"]
Beispiel #29
0
def search_page():
    logger.info("in search_page")
    if 'info_database' in request.args:
        logger.info("Going to sharing_info_page")
        template_vars = sharing_info_page()
        if template_vars.redirect_url:
            logger.info("Going to redirect")
            return flask.redirect(template_vars.redirect_url)
        else:
            return render_template("data_sharing.html",
                                   **template_vars.__dict__)
    else:
        result = None
        if USE_REDIS:
            with Bench("Trying Redis cache"):
                key = "search_results:v1:" + json.dumps(request.args,
                                                        sort_keys=True)
                logger.debug("key is:", pf(key))
                result = Redis.get(key)
                if result:
                    logger.info("Redis cache hit on search results!")
                    result = pickle.loads(result)
        else:
            logger.info("Skipping Redis cache (USE_REDIS=False)")

        logger.info("request.args is", request.args)
        the_search = search_results.SearchResultPage(request.args)
        result = the_search.__dict__

        logger.debugf("result", result)

        if USE_REDIS:
            Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
            Redis.expire(key, 60 * 60)

        if result['search_term_exists']:
            return render_template("search_result_page.html", **result)
        else:
            return render_template("search_error.html")
Beispiel #30
0
def search_page():
    logger.info("in search_page")
    logger.error(request.url)
    if 'info_database' in request.args:
        logger.info("Going to sharing_info_page")
        template_vars = sharing_info_page()
        if template_vars.redirect_url:
            logger.info("Going to redirect")
            return flask.redirect(template_vars.redirect_url)
        else:
            return render_template("data_sharing.html", **template_vars.__dict__)
    else:
        result = None
        if USE_REDIS:
            with Bench("Trying Redis cache"):
                key = "search_results:v1:" + json.dumps(request.args, sort_keys=True)
                logger.debug("key is:", pf(key))
                result = Redis.get(key)
                if result:
                    logger.info("Redis cache hit on search results!")
                    result = pickle.loads(result)
        else:
            logger.info("Skipping Redis cache (USE_REDIS=False)")

        logger.info("request.args is", request.args)
        the_search = search_results.SearchResultPage(request.args)
        result = the_search.__dict__

        logger.debugf("result", result)

        if USE_REDIS:
            Redis.set(key, pickle.dumps(result, pickle.HIGHEST_PROTOCOL))
            Redis.expire(key, 60*60)

        if result['search_term_exists']:
            return render_template("search_result_page.html", **result)
        else:
            return render_template("search_error.html")
    def run_plink(self):
        plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(self.dataset.group.name, self.this_trait.name))

        self.gen_pheno_txt_file_plink(pheno_filename = plink_output_filename)

        plink_command = PLINK_COMMAND + ' --noweb --ped %s/%s.ped --no-fid --no-parents --no-sex --no-pheno --map %s/%s.map --pheno %s%s.txt --pheno-name %s --maf %s --missing-phenotype -9999 --out %s%s --assoc ' % (PLINK_PATH, self.dataset.group.name, PLINK_PATH, self.dataset.group.name, TMPDIR, plink_output_filename, self.this_trait.name, self.maf, TMPDIR, plink_output_filename)
        logger.debug("plink_command:", plink_command)

        os.system(plink_command)

        count, p_values = self.parse_plink_output(plink_output_filename)

        #for marker in self.dataset.group.markers.markers:
        #    if marker['name'] not in included_markers:
        #        logger.debug("marker:", marker)
        #        self.dataset.group.markers.markers.remove(marker)
        #        #del self.dataset.group.markers.markers[marker]

        logger.debug("p_values:", pf(p_values))

        self.dataset.group.markers.add_pvalues(p_values)

        return self.dataset.group.markers.markers
def run_plink(this_trait, dataset, species, vals, maf):
    plink_output_filename = webqtlUtil.genRandStr("%s_%s_"%(dataset.group.name, this_trait.name))

    gen_pheno_txt_file(dataset, vals)
    #gen_pheno_txt_file_plink(this_trait, dataset, vals, pheno_filename = plink_output_filename)

    plink_command = PLINK_COMMAND + ' --noweb --bfile %s/%s --no-fid --no-parents --no-sex --maf %s --missing-phenotype -9 --out %s/%s --assoc ' % (
        flat_files('mapping'), dataset.group.name, maf, TMPDIR, plink_output_filename)
    logger.debug("plink_command:", plink_command)

    os.system(plink_command)

    count, p_values = parse_plink_output(plink_output_filename, species)

    #for marker in self.dataset.group.markers.markers:
    #    if marker['name'] not in included_markers:
    #        logger.debug("marker:", marker)
    #        self.dataset.group.markers.markers.remove(marker)
    #        #del self.dataset.group.markers.markers[marker]

    logger.debug("p_values:", p_values)
    dataset.group.markers.add_pvalues(p_values)

    return dataset.group.markers.markers
Beispiel #33
0
    def get_attributes(self):
        """Finds which extra attributes apply to this dataset"""

        # Get attribute names and distinct values for each attribute
        results = g.db.execute(
            '''
                        SELECT DISTINCT CaseAttribute.Id, CaseAttribute.Name, CaseAttributeXRef.Value
                        FROM CaseAttribute, CaseAttributeXRef
                        WHERE CaseAttributeXRef.CaseAttributeId = CaseAttribute.Id
                        AND CaseAttributeXRef.ProbeSetFreezeId = %s
                        ORDER BY CaseAttribute.Name''',
            (str(self.dataset.id), ))

        self.attributes = {}
        for attr, values in itertools.groupby(results.fetchall(), lambda row:
                                              (row.Id, row.Name)):
            key, name = attr
            logger.debug("radish: %s - %s" % (key, name))
            self.attributes[key] = Bunch()
            self.attributes[key].name = name
            self.attributes[key].distinct_values = [
                item.Value for item in values
            ]
            self.attributes[key].distinct_values.sort(key=natural_sort_key)
Beispiel #34
0
def marker_regression_page():
    initial_start_vars = request.form
    logger.debug("Marker regression called with initial_start_vars:", initial_start_vars.items())
    temp_uuid = initial_start_vars['temp_uuid']
    wanted = (
        'trait_id',
        'dataset',
        'method',
        'trimmed_markers',
        'selected_chr',
        'chromosomes',
        'mapping_scale',
        'score_type',
        'suggestive',
        'significant',
        'num_perm',
        'permCheck',
        'perm_output',
        'num_bootstrap',
        'bootCheck',
        'bootstrap_results',
        'LRSCheck',
        'maf',
        'manhattan_plot',
        'control_marker',
        'control_marker_db',
        'do_control',
        'genofile',
        'pair_scan',
        'startMb',
        'endMb',
        'graphWidth',
        'lrsMax',
        'additiveCheck',
        'showSNP',
        'showGenes',
        'viewLegend',
        'haplotypeAnalystCheck',
        'mapmethod_rqtl_geno',
        'mapmodel_rqtl_geno'
    )
    start_vars = {}
    for key, value in initial_start_vars.iteritems():
        if key in wanted or key.startswith(('value:')):
            start_vars[key] = value
    logger.debug("Marker regression called with start_vars:", start_vars)

    version = "v3"
    key = "marker_regression:{}:".format(version) + json.dumps(start_vars, sort_keys=True)
    logger.info("key is:", pf(key))
    with Bench("Loading cache"):
        result = None # Just for testing
        #result = Redis.get(key)

    #logger.info("************************ Starting result *****************")
    #logger.info("result is [{}]: {}".format(type(result), result))
    #logger.info("************************ Ending result ********************")

    if result:
        logger.info("Cache hit!!!")
        with Bench("Loading results"):
            result = pickle.loads(result)
    else:
        logger.info("Cache miss!!!")
        with Bench("Total time in MarkerRegression"):
            template_vars = marker_regression.MarkerRegression(start_vars, temp_uuid)

        if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink":
            template_vars.js_data = json.dumps(template_vars.js_data,
                                               default=json_default_handler,
                                               indent="   ")

        result = template_vars.__dict__

        if result['pair_scan']:
            with Bench("Rendering template"):
                img_path = result['pair_scan_filename']
                logger.info("img_path:", img_path)
                initial_start_vars = request.form
                logger.info("initial_start_vars:", initial_start_vars)
                imgfile = open(TEMPDIR + img_path, 'rb')
                imgdata = imgfile.read()
                imgB64 = imgdata.encode("base64")
                bytesarray = array.array('B', imgB64)
                result['pair_scan_array'] = bytesarray
                rendered_template = render_template("pair_scan_results.html", **result)
        else:
            #for item in template_vars.__dict__.keys():
            #    logger.info("  ---**--- {}: {}".format(type(template_vars.__dict__[item]), item))

            gn1_template_vars = marker_regression_gn1.MarkerRegression(result).__dict__
            #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
            #logger.info("pickled result length:", len(pickled_result))
            #Redis.set(key, pickled_result)
            #Redis.expire(key, 1*60)

            with Bench("Rendering template"):
                if (gn1_template_vars['mapping_method'] == "gemma") or (gn1_template_vars['mapping_method'] == "plink"):
                    gn1_template_vars.pop('qtlresults', None)
                print("TEMPLATE KEYS:", list(gn1_template_vars.keys()))
                rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars)

    # with Bench("Rendering template"):
        # if result['pair_scan'] == True:
            # img_path = result['pair_scan_filename']
            # logger.info("img_path:", img_path)
            # initial_start_vars = request.form
            # logger.info("initial_start_vars:", initial_start_vars)
            # imgfile = open(TEMPDIR + '/' + img_path, 'rb')
            # imgdata = imgfile.read()
            # imgB64 = imgdata.encode("base64")
            # bytesarray = array.array('B', imgB64)
            # result['pair_scan_array'] = bytesarray
            # rendered_template = render_template("pair_scan_results.html", **result)
        # else:
            # rendered_template = render_template("marker_regression.html", **result)
            # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars)

    return rendered_template
Beispiel #35
0
def connect_db():
    db = getattr(g, '_database', None)
    if db is None:
        logger.debug("Get new database connector")
        g.db = g._database = sqlalchemy.create_engine(SQL_URI)
        logger.debug(g.db)
Beispiel #36
0
def marker_regression_page():
    initial_start_vars = request.form
    logger.debug("Marker regression called with initial_start_vars:",
                 initial_start_vars.items())
    temp_uuid = initial_start_vars['temp_uuid']
    wanted = ('trait_id', 'dataset', 'method', 'trimmed_markers',
              'selected_chr', 'chromosomes', 'mapping_scale', 'score_type',
              'suggestive', 'significant', 'num_perm', 'permCheck',
              'perm_output', 'num_bootstrap', 'bootCheck', 'bootstrap_results',
              'LRSCheck', 'maf', 'manhattan_plot', 'control_marker',
              'control_marker_db', 'do_control', 'genofile', 'pair_scan',
              'startMb', 'endMb', 'graphWidth', 'lrsMax', 'additiveCheck',
              'showSNP', 'showGenes', 'viewLegend', 'haplotypeAnalystCheck',
              'mapmethod_rqtl_geno', 'mapmodel_rqtl_geno')
    start_vars = {}
    for key, value in initial_start_vars.iteritems():
        if key in wanted or key.startswith(('value:')):
            start_vars[key] = value
    logger.debug("Marker regression called with start_vars:", start_vars)

    version = "v3"
    key = "marker_regression:{}:".format(version) + json.dumps(start_vars,
                                                               sort_keys=True)
    logger.info("key is:", pf(key))
    with Bench("Loading cache"):
        result = None  # Just for testing
        #result = Redis.get(key)

    #logger.info("************************ Starting result *****************")
    #logger.info("result is [{}]: {}".format(type(result), result))
    #logger.info("************************ Ending result ********************")

    if result:
        logger.info("Cache hit!!!")
        with Bench("Loading results"):
            result = pickle.loads(result)
    else:
        logger.info("Cache miss!!!")
        with Bench("Total time in MarkerRegression"):
            template_vars = marker_regression.MarkerRegression(
                start_vars, temp_uuid)

        if template_vars.mapping_method != "gemma" and template_vars.mapping_method != "plink":
            template_vars.js_data = json.dumps(template_vars.js_data,
                                               default=json_default_handler,
                                               indent="   ")

        result = template_vars.__dict__

        if result['pair_scan']:
            with Bench("Rendering template"):
                img_path = result['pair_scan_filename']
                logger.info("img_path:", img_path)
                initial_start_vars = request.form
                logger.info("initial_start_vars:", initial_start_vars)
                imgfile = open(TEMPDIR + img_path, 'rb')
                imgdata = imgfile.read()
                imgB64 = imgdata.encode("base64")
                bytesarray = array.array('B', imgB64)
                result['pair_scan_array'] = bytesarray
                rendered_template = render_template("pair_scan_results.html",
                                                    **result)
        else:
            #for item in template_vars.__dict__.keys():
            #    logger.info("  ---**--- {}: {}".format(type(template_vars.__dict__[item]), item))

            gn1_template_vars = marker_regression_gn1.MarkerRegression(
                result).__dict__
            #pickled_result = pickle.dumps(result, pickle.HIGHEST_PROTOCOL)
            #logger.info("pickled result length:", len(pickled_result))
            #Redis.set(key, pickled_result)
            #Redis.expire(key, 1*60)

            with Bench("Rendering template"):
                if (gn1_template_vars['mapping_method']
                        == "gemma") or (gn1_template_vars['mapping_method']
                                        == "plink"):
                    gn1_template_vars.pop('qtlresults', None)
                print("TEMPLATE KEYS:", list(gn1_template_vars.keys()))
                rendered_template = render_template(
                    "marker_regression_gn1.html", **gn1_template_vars)

    # with Bench("Rendering template"):
    # if result['pair_scan'] == True:
    # img_path = result['pair_scan_filename']
    # logger.info("img_path:", img_path)
    # initial_start_vars = request.form
    # logger.info("initial_start_vars:", initial_start_vars)
    # imgfile = open(TEMPDIR + '/' + img_path, 'rb')
    # imgdata = imgfile.read()
    # imgB64 = imgdata.encode("base64")
    # bytesarray = array.array('B', imgB64)
    # result['pair_scan_array'] = bytesarray
    # rendered_template = render_template("pair_scan_results.html", **result)
    # else:
    # rendered_template = render_template("marker_regression.html", **result)
    # rendered_template = render_template("marker_regression_gn1.html", **gn1_template_vars)

    return rendered_template
Beispiel #37
0
def shutdown_session(exception=None):
    db = getattr(g, '_database', None)
    if db is not None:
        logger.debug("remove db_session")
        db_session.remove()
        g.db = None
Beispiel #38
0
def get_dataset_info(dataset_name, group_name=None, file_format="json"):
    #ZS: First get ProbeSet (mRNA expression) datasets and then get Phenotype datasets

    datasets_list = [
    ]  #ZS: I figure I might as well return a list if there are multiple matches, though I don"t know if this will actually happen in practice

    probeset_query = """
                SELECT ProbeSetFreeze.Id, ProbeSetFreeze.Name, ProbeSetFreeze.FullName,
                       ProbeSetFreeze.ShortName, ProbeSetFreeze.DataScale, ProbeFreeze.TissueId,
                       Tissue.Name, ProbeSetFreeze.public, ProbeSetFreeze.confidentiality
                FROM ProbeSetFreeze, ProbeFreeze, Tissue
            """

    where_statement = """
                         WHERE ProbeSetFreeze.ProbeFreezeId = ProbeFreeze.Id AND
                               ProbeFreeze.TissueId = Tissue.Id AND
                               ProbeSetFreeze.public > 0 AND
                               ProbeSetFreeze.confidentiality < 1 AND
                      """
    if dataset_name.isdigit():
        where_statement += """
                              ProbeSetFreeze.Id = "{}"
                           """.format(dataset_name)
    else:
        where_statement += """
                              (ProbeSetFreeze.Name = "{0}" OR ProbeSetFreeze.Name2 = "{0}" OR
                              ProbeSetFreeze.FullName = "{0}" OR ProbeSetFreeze.ShortName = "{0}")
                           """.format(dataset_name)

    probeset_query += where_statement
    probeset_results = g.db.execute(probeset_query)
    dataset = probeset_results.fetchone()

    if dataset:
        dataset_dict = {
            "dataset_type": "mRNA expression",
            "id": dataset[0],
            "name": dataset[1],
            "full_name": dataset[2],
            "short_name": dataset[3],
            "data_scale": dataset[4],
            "tissue_id": dataset[5],
            "tissue": dataset[6],
            "public": dataset[7],
            "confidential": dataset[8]
        }

        datasets_list.append(dataset_dict)

    if group_name:
        pheno_query = """
                         SELECT PublishXRef.Id, Phenotype.Post_publication_abbreviation, Phenotype.Post_publication_description,
                                Phenotype.Pre_publication_abbreviation, Phenotype.Pre_publication_description,
                                Publication.PubMed_ID, Publication.Title, Publication.Year
                         FROM PublishXRef, Phenotype, Publication, InbredSet, PublishFreeze
                         WHERE PublishXRef.InbredSetId = InbredSet.Id AND
                               PublishXRef.PhenotypeId = Phenotype.Id AND
                               PublishXRef.PublicationId = Publication.Id AND
                               PublishFreeze.InbredSetId = InbredSet.Id AND
                               PublishFreeze.public > 0 AND
                               PublishFreeze.confidentiality < 1 AND
                               InbredSet.Name = "{0}" AND PublishXRef.Id = "{1}"
                      """.format(group_name, dataset_name)

        logger.debug("QUERY:", pheno_query)

        pheno_results = g.db.execute(pheno_query)
        dataset = pheno_results.fetchone()

        if dataset:
            if dataset[5]:
                dataset_dict = {
                    "dataset_type": "phenotype",
                    "id": dataset[0],
                    "name": dataset[1],
                    "description": dataset[2],
                    "pubmed_id": dataset[5],
                    "title": dataset[6],
                    "year": dataset[7]
                }
            elif dataset[4]:
                dataset_dict = {
                    "dataset_type": "phenotype",
                    "id": dataset[0],
                    "name": dataset[3],
                    "description": dataset[4]
                }
            else:
                dataset_dict = {"dataset_type": "phenotype", "id": dataset[0]}

            datasets_list.append(dataset_dict)

    if len(datasets_list) > 1:
        return flask.jsonify(datasets_list)
    elif len(datasets_list) == 1:
        return flask.jsonify(dataset_dict)
    else:
        return return_error(code=204,
                            source=request.url_rule.rule,
                            title="No Results",
                            details="")
Beispiel #39
0
def connect_db():
    db = getattr(g, '_database', None)
    if db is None:
        logger.debug("Get new database connector")
        g.db = g._database = sqlalchemy.create_engine(SQL_URI)
        logger.debug(g.db)
Beispiel #40
0
def shutdown_session(exception=None):
    db = getattr(g, '_database', None)
    if db is not None:
        logger.debug("remove db_session")
        db_session.remove()
        g.db = None
Beispiel #41
0
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = []  # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' +
                                       self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf']  # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset,
                                                     self.samples, self.vals)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.dataset, self.method, self.model,
                    self.permCheck, self.num_perm, self.do_control,
                    self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(
                self.this_trait, self.dataset, self.samples, self.json_data,
                self.num_perm, self.bootCheck, self.num_bootstrap,
                self.do_control, self.control_marker, self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            self.dataset.group.genofile = start_vars['genofile']
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                        'chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker[
                            'chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(json_data=self.json_data,
                                this_trait=self.this_trait.name,
                                data_set=self.dataset.name,
                                maf=self.maf,
                                manhattan_plot=self.manhattan_plot,
                                mapping_scale=self.mapping_scale,
                                qtl_results=self.qtl_results)

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker[
                        'chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker[
                            'chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value'
                                                          in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([
                        self.species.chromosomes.chromosomes[key].name,
                        self.species.chromosomes.chromosomes[key].mb_length
                    ])
                    chromosome_mb_lengths[
                        key] = self.species.chromosomes.chromosomes[
                            key].mb_length

                # logger.debug("json_data:", self.json_data)

                self.js_data = dict(
                    result_score_type=self.score_type,
                    json_data=self.json_data,
                    this_trait=self.this_trait.name,
                    data_set=self.dataset.name,
                    maf=self.maf,
                    manhattan_plot=self.manhattan_plot,
                    mapping_scale=self.mapping_scale,
                    chromosomes=chromosome_mb_lengths,
                    qtl_results=self.qtl_results,
                    num_perm=self.num_perm,
                    perm_results=self.perm_output,
                )
Beispiel #42
0
def get_bnw_input(start_vars):
    logger.debug("BNW VARS:", start_vars)
Beispiel #43
0
def get_bnw_input(start_vars):
    logger.debug("BNW VARS:", start_vars)
    def __init__(self, start_vars, temp_uuid):
        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid  #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        #ZS: Needed to zoom in or remap temp traits like PCA traits
        if "temp_trait" in start_vars and start_vars['temp_trait'] != "False":
            self.temp_trait = "True"
            self.group = self.dataset.group.name

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        #ZS: Sometimes a group may have a genofile that only includes a subset of samples
        genofile_samplelist = []
        if 'genofile' in start_vars:
            if start_vars['genofile'] != "":
                self.genofile_string = start_vars['genofile']
                self.dataset.group.genofile = self.genofile_string.split(
                    ":")[0]
                genofile_samplelist = get_genofile_samplelist(self.dataset)

        all_samples_ordered = self.dataset.group.all_samples_ordered()

        self.vals = []
        self.samples = []
        self.sample_vals = start_vars['sample_vals']
        sample_val_dict = json.loads(self.sample_vals)
        samples = sample_val_dict.keys()
        if (len(genofile_samplelist) != 0):
            for sample in genofile_samplelist:
                self.samples.append(sample)
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                else:
                    self.vals.append("x")
        else:
            for sample in self.dataset.group.samplelist:
                if sample in samples:
                    self.vals.append(sample_val_dict[sample])
                    self.samples.append(sample)

        if 'n_samples' in start_vars:
            self.n_samples = start_vars['n_samples']
        else:
            self.n_samples = len([val for val in self.vals if val != "x"])

        #ZS: Check if genotypes exist in the DB in order to create links for markers

        self.geno_db_exists = geno_db_exists(self.dataset)

        self.mapping_method = start_vars['method']
        if "results_path" in start_vars:
            self.mapping_results_path = start_vars['results_path']
        else:
            mapping_results_filename = self.dataset.group.name + "_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            self.mapping_results_path = "{}{}.csv".format(
                webqtlConfig.GENERATED_IMAGE_DIR, mapping_results_filename)

        self.manhattan_plot = False
        if 'manhattan_plot' in start_vars:
            if start_vars['manhattan_plot'].lower() != "false":
                self.color_scheme = "alternating"
                if "color_scheme" in start_vars:
                    self.color_scheme = start_vars['color_scheme']
                    if self.color_scheme == "single":
                        self.manhattan_single_color = start_vars[
                            'manhattan_single_color']
                self.manhattan_plot = True

        self.maf = start_vars['maf']  # Minor allele frequency
        if "use_loco" in start_vars:
            self.use_loco = start_vars['use_loco']
        else:
            self.use_loco = None
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False  # Initializing this since it is checked in views to determine which template to use
        if 'transform' in start_vars:
            self.transform = start_vars['transform']
        else:
            self.transform = ""
        self.score_type = "LRS"  #ZS: LRS or LOD
        self.mapping_scale = "physic"
        if "mapping_scale" in start_vars:
            self.mapping_scale = start_vars['mapping_scale']
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []
        self.covariates = start_vars[
            'covariates'] if "covariates" in start_vars else ""
        self.categorical_vars = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(
                    start_vars['selected_chr']
            ) != -1:  #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars:  #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        #self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.first_run = True
            self.output_files = None
            if 'output_files' in start_vars:
                self.output_files = start_vars['output_files']
            if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                self.first_run = False
            self.score_type = "-logP"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                if self.use_loco == "True":
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
                else:
                    marker_obs, self.output_files = gemma_mapping.run_gemma(
                        self.this_trait, self.dataset, self.samples, self.vals,
                        self.covariates, self.use_loco, self.maf,
                        self.first_run, self.output_files)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            perm_strata = []
            if "perm_strata" in start_vars and "categorical_vars" in start_vars:
                self.categorical_vars = start_vars["categorical_vars"].split(
                    ",")
                if len(self.categorical_vars
                       ) and start_vars["perm_strata"] == "True":
                    primary_samples = SampleList(dataset=self.dataset,
                                                 sample_names=self.samples,
                                                 this_trait=self.this_trait)

                    perm_strata = get_perm_strata(self.this_trait,
                                                  primary_samples,
                                                  self.categorical_vars,
                                                  self.samples)
            self.score_type = "LOD"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            if 'mapmethod_rqtl_geno' in start_vars:
                self.method = start_vars['mapmethod_rqtl_geno']
            else:
                self.method = "em"
            self.model = start_vars['mapmodel_rqtl_geno']
            #if start_vars['pair_scan'] == "true":
            #    self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
            else:
                results = rqtl_mapping.run_rqtl_geno(
                    self.vals, self.samples, self.dataset, self.mapping_scale,
                    self.method, self.model, self.permCheck, self.num_perm,
                    perm_strata, self.do_control, self.control_marker,
                    self.manhattan_plot, self.pair_scan, self.covariates)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars:  #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.reaper_version = start_vars['reaper_version']

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            logger.info("Running qtlreaper")

            if self.reaper_version == "new":
                self.first_run = True
                self.output_files = None
                if 'first_run' in start_vars:  #ZS: check if first run so existing result files can be used if it isn't (for example zooming on a chromosome, etc)
                    self.first_run = False
                    if 'output_files' in start_vars:
                        self.output_files = start_vars['output_files'].split(
                            ",")

                results, self.perm_output, self.suggestive, self.significant, self.bootstrap_results, self.output_files = qtlreaper_mapping.run_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot, self.first_run, self.output_files)
            else:
                results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.run_original_reaper(
                    self.this_trait, self.dataset, self.samples, self.vals,
                    self.json_data, self.num_perm, self.bootCheck,
                    self.num_bootstrap, self.do_control, self.control_marker,
                    self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-logP"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset,
                                              self.species, self.vals,
                                              self.maf)
            #results = self.run_plink()
        else:
            logger.debug("RUNNING NOTHING")

        self.no_results = False
        if len(results) == 0:
            self.no_results = True
        else:
            if self.pair_scan == True:
                self.qtl_results = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if marker['chr1'] > 0 or marker['chr1'] == "X" or marker[
                            'chr1'] == "X/Y":
                        if marker['chr1'] > highest_chr or marker[
                                'chr1'] == "X" or marker['chr1'] == "X/Y":
                            highest_chr = marker['chr1']
                        if 'lod_score' in list(marker.keys()):
                            self.qtl_results.append(marker)

                self.trimmed_markers = results

                for qtl in enumerate(self.qtl_results):
                    self.json_data['chr1'].append(str(qtl['chr1']))
                    self.json_data['chr2'].append(str(qtl['chr2']))
                    self.json_data['Mb'].append(qtl['Mb'])
                    self.json_data['markernames'].append(qtl['name'])

                self.js_data = dict(json_data=self.json_data,
                                    this_trait=self.this_trait.name,
                                    data_set=self.dataset.name,
                                    maf=self.maf,
                                    manhattan_plot=self.manhattan_plot,
                                    mapping_scale=self.mapping_scale,
                                    qtl_results=self.qtl_results)

            else:
                self.qtl_results = []
                self.results_for_browser = []
                self.annotations_for_browser = []
                highest_chr = 1  #This is needed in order to convert the highest chr to X/Y
                for marker in results:
                    if 'Mb' in marker:
                        this_ps = marker['Mb'] * 1000000
                    else:
                        this_ps = marker['cM'] * 1000000

                    browser_marker = dict(chr=str(marker['chr']),
                                          rs=marker['name'],
                                          ps=this_ps,
                                          url="/show_trait?trait_id=" +
                                          marker['name'] + "&dataset=" +
                                          self.dataset.group.name + "Geno")

                    if self.geno_db_exists == "True":
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps,
                                            url="/show_trait?trait_id=" +
                                            marker['name'] + "&dataset=" +
                                            self.dataset.group.name + "Geno")
                    else:
                        annot_marker = dict(name=str(marker['name']),
                                            chr=str(marker['chr']),
                                            rs=marker['name'],
                                            pos=this_ps)

                    if 'lrs_value' in marker and marker['lrs_value'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lrs_value'] /
                                                         4.61)
                    elif 'lod_score' in marker and marker['lod_score'] > 0:
                        browser_marker['p_wald'] = 10**-(marker['lod_score'])
                    else:
                        browser_marker['p_wald'] = 0

                    self.results_for_browser.append(browser_marker)
                    self.annotations_for_browser.append(annot_marker)
                    if str(marker['chr']) > '0' or str(
                            marker['chr']) == "X" or str(
                                marker['chr']) == "X/Y":
                        if str(marker['chr']) > str(highest_chr) or str(
                                marker['chr']) == "X" or str(
                                    marker['chr']) == "X/Y":
                            highest_chr = marker['chr']
                        if ('lod_score'
                                in marker.keys()) or ('lrs_value'
                                                      in marker.keys()):
                            if 'Mb' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.6f}".format(
                                        marker['Mb'])
                            elif 'cM' in marker.keys():
                                marker['display_pos'] = "Chr" + str(
                                    marker['chr']) + ": " + "{:.3f}".format(
                                        marker['cM'])
                            else:
                                marker['display_pos'] = "N/A"
                            self.qtl_results.append(marker)

                total_markers = len(self.qtl_results)

                with Bench("Exporting Results"):
                    export_mapping_results(self.dataset, self.this_trait,
                                           self.qtl_results,
                                           self.mapping_results_path,
                                           self.mapping_scale, self.score_type)

                with Bench("Trimming Markers for Figure"):
                    if len(self.qtl_results) > 30000:
                        self.qtl_results = trim_markers_for_figure(
                            self.qtl_results)
                        self.results_for_browser = trim_markers_for_figure(
                            self.results_for_browser)
                        filtered_annotations = []
                        for marker in self.results_for_browser:
                            for annot_marker in self.annotations_for_browser:
                                if annot_marker['rs'] == marker['rs']:
                                    filtered_annotations.append(annot_marker)
                                    break
                        self.annotations_for_browser = filtered_annotations
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)
                    else:
                        browser_files = write_input_for_browser(
                            self.dataset, self.results_for_browser,
                            self.annotations_for_browser)

                with Bench("Trimming Markers for Table"):
                    self.trimmed_markers = trim_markers_for_table(results)

                chr_lengths = get_chr_lengths(self.mapping_scale,
                                              self.mapping_method,
                                              self.dataset, self.qtl_results)

                #ZS: For zooming into genome browser, need to pass chromosome name instead of number
                if self.dataset.group.species == "mouse":
                    if self.selected_chr == 20:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                elif self.dataset.group.species == "rat":
                    if self.selected_chr == 21:
                        this_chr = "X"
                    else:
                        this_chr = str(self.selected_chr)
                else:
                    if self.selected_chr == 22:
                        this_chr = "X"
                    elif self.selected_chr == 23:
                        this_chr = "Y"
                    else:
                        this_chr = str(self.selected_chr)

                if self.mapping_method != "gemma":
                    if self.score_type == "LRS":
                        significant_for_browser = self.significant / 4.61
                    else:
                        significant_for_browser = self.significant

                    self.js_data = dict(
                        #result_score_type = self.score_type,
                        #this_trait = self.this_trait.name,
                        #data_set = self.dataset.name,
                        #maf = self.maf,
                        #manhattan_plot = self.manhattan_plot,
                        #mapping_scale = self.mapping_scale,
                        #chromosomes = chromosome_mb_lengths,
                        #qtl_results = self.qtl_results,
                        categorical_vars=self.categorical_vars,
                        chr_lengths=chr_lengths,
                        num_perm=self.num_perm,
                        perm_results=self.perm_output,
                        significant=significant_for_browser,
                        browser_files=browser_files,
                        selected_chr=this_chr,
                        total_markers=total_markers)
                else:
                    self.js_data = dict(chr_lengths=chr_lengths,
                                        browser_files=browser_files,
                                        selected_chr=this_chr,
                                        total_markers=total_markers)
    def run_analysis(self, requestform):
        logger.info("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        logger.debug("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        logger.debug("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        logger.debug("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        logger.debug("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        logger.debug("dataset group: ", dataset.group)
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        logger.debug(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          logger.debug("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt = TRAIT.retrieve_sample_data(gt, dataset, individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        logger.debug(rPheno)

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names = False)
        rGeno = r_data_frame(rGeno, check_names = False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png')
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2                                                 # We start from 2, since R starts from 1 :)
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png')
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        if not type(significant) == ri.RNULLType:
          for x in range(len(significant[0])):
            logger.debug(significant[0][x], significant[1][x], significant[2][x])     # Debug to console
            tsS = significant[0][x].split(':')                                        # Source
            tsT = significant[2][x].split(':')                                        # Target
            gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1])            # Retrieve Source info from the DB
            gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1])            # Retrieve Target info from the DB
            self.addNode(gtS)
            self.addNode(gtT)
            self.addEdge(gtS, gtT, significant, x)

            significant[0][x] = gtS.symbol + " (" + gtS.name + ")"                    # Update the trait name for the displayed table
            significant[2][x] = gtT.symbol + " (" + gtT.name + ")"                    # Update the trait name for the displayed table

        self.elements = json.dumps(self.nodes_list + self.edges_list)
    def gen_data(self, temp_uuid):
        """Generates p-values for each marker"""

        logger.debug("self.vals is:", self.vals)
        pheno_vector = np.array([(val == "x" or val == "") and np.nan or float(val) for val in self.vals])

        #lmm_uuid = str(uuid.uuid4())

        key = "pylmm:input:" + temp_uuid
        logger.debug("key is:", pf(key))
        #with Bench("Loading cache"):
        #    result = Redis.get(key)

        if self.dataset.group.species == "human":
            p_values, t_stats = self.gen_human_results(pheno_vector, key, temp_uuid)
            #p_values = self.trim_results(p_values)

        else:
            logger.debug("NOW CWD IS:", os.getcwd())
            genotype_data = [marker['genotypes'] for marker in self.dataset.group.markers.markers]

            no_val_samples = self.identify_empty_samples()
            trimmed_genotype_data = self.trim_genotypes(genotype_data, no_val_samples)

            genotype_matrix = np.array(genotype_data).T

            #logger.debug("pheno_vector: ", pf(pheno_vector))
            #logger.debug("genotype_matrix: ", pf(genotype_matrix))
            #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape))

            #params = {"pheno_vector": pheno_vector,
            #            "genotype_matrix": genotype_matrix,
            #            "restricted_max_likelihood": True,
            #            "refit": False,
            #            "temp_data": tempdata}

            # logger.debug("genotype_matrix:", str(genotype_matrix.tolist()))
            # logger.debug("pheno_vector:", str(pheno_vector.tolist()))

            params = dict(pheno_vector = pheno_vector.tolist(),
                        genotype_matrix = genotype_matrix.tolist(),
                        restricted_max_likelihood = True,
                        refit = False,
                        temp_uuid = temp_uuid,

                        # meta data
                        timestamp = datetime.datetime.now().isoformat(),
                        )

            json_params = json.dumps(params)
            #logger.debug("json_params:", json_params)
            Redis.set(key, json_params)
            Redis.expire(key, 60*60)
            logger.debug("before printing command")

            command = PYLMM_COMMAND + ' --key {} --species {}'.format(key, "other")
            logger.debug("command is:", command)
            logger.debug("after printing command")

            shell(command)

            #t_stats, p_values = lmm.run(key)
            #lmm.run(key)

            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45*60)
            results = json.loads(json_results[1])
            p_values = [float(result) for result in results['p_values']]
            #logger.debug("p_values:", p_values[:10])
            #p_values = self.trim_results(p_values)
            t_stats = results['t_stats']

            #t_stats, p_values = lmm.run(
            #    pheno_vector,
            #    genotype_matrix,
            #    restricted_max_likelihood=True,
            #    refit=False,
            #    temp_data=tempdata
            #)
            #logger.debug("p_values:", p_values)

        self.dataset.group.markers.add_pvalues(p_values)

        #self.get_lod_score_cutoff()

        return self.dataset.group.markers.markers
Beispiel #47
0
    def gen_data(self, temp_uuid):
        """Generates p-values for each marker"""

        logger.debug("self.vals is:", self.vals)
        pheno_vector = np.array([(val == "x" or val == "") and np.nan
                                 or float(val) for val in self.vals])

        #lmm_uuid = str(uuid.uuid4())

        key = "pylmm:input:" + temp_uuid
        logger.debug("key is:", pf(key))
        #with Bench("Loading cache"):
        #    result = Redis.get(key)

        if self.dataset.group.species == "human":
            p_values, t_stats = self.gen_human_results(pheno_vector, key,
                                                       temp_uuid)
            #p_values = self.trim_results(p_values)

        else:
            logger.debug("NOW CWD IS:", os.getcwd())
            genotype_data = [
                marker['genotypes']
                for marker in self.dataset.group.markers.markers
            ]

            no_val_samples = self.identify_empty_samples()
            trimmed_genotype_data = self.trim_genotypes(
                genotype_data, no_val_samples)

            genotype_matrix = np.array(genotype_data).T

            #logger.debug("pheno_vector: ", pf(pheno_vector))
            #logger.debug("genotype_matrix: ", pf(genotype_matrix))
            #logger.debug("genotype_matrix.shape: ", pf(genotype_matrix.shape))

            #params = {"pheno_vector": pheno_vector,
            #            "genotype_matrix": genotype_matrix,
            #            "restricted_max_likelihood": True,
            #            "refit": False,
            #            "temp_data": tempdata}

            # logger.debug("genotype_matrix:", str(genotype_matrix.tolist()))
            # logger.debug("pheno_vector:", str(pheno_vector.tolist()))

            params = dict(
                pheno_vector=pheno_vector.tolist(),
                genotype_matrix=genotype_matrix.tolist(),
                restricted_max_likelihood=True,
                refit=False,
                temp_uuid=temp_uuid,

                # meta data
                timestamp=datetime.datetime.now().isoformat(),
            )

            json_params = json.dumps(params)
            #logger.debug("json_params:", json_params)
            Redis.set(key, json_params)
            Redis.expire(key, 60 * 60)
            logger.debug("before printing command")

            command = PYLMM_COMMAND + ' --key {} --species {}'.format(
                key, "other")
            logger.debug("command is:", command)
            logger.debug("after printing command")

            shell(command)

            #t_stats, p_values = lmm.run(key)
            #lmm.run(key)

            json_results = Redis.blpop("pylmm:results:" + temp_uuid, 45 * 60)
            results = json.loads(json_results[1])
            p_values = [float(result) for result in results['p_values']]
            #logger.debug("p_values:", p_values[:10])
            #p_values = self.trim_results(p_values)
            t_stats = results['t_stats']

            #t_stats, p_values = lmm.run(
            #    pheno_vector,
            #    genotype_matrix,
            #    restricted_max_likelihood=True,
            #    refit=False,
            #    temp_data=tempdata
            #)
            #logger.debug("p_values:", p_values)

        self.dataset.group.markers.add_pvalues(p_values)

        #self.get_lod_score_cutoff()

        return self.dataset.group.markers.markers
    def __init__(self, start_vars, temp_uuid):

        helper_functions.get_species_dataset_trait(self, start_vars)

        self.temp_uuid = temp_uuid #needed to pass temp_uuid to gn1 mapping code (marker_regression_gn1.py)

        self.json_data = {}
        self.json_data['lodnames'] = ['lod.hk']

        self.samples = [] # Want only ones with values
        self.vals = []

        all_samples_ordered = self.dataset.group.all_samples_ordered()
        primary_sample_names = list(all_samples_ordered)

        for sample in self.dataset.group.samplelist:
            # sample is actually the name of an individual
            in_trait_data = False
            for item in self.this_trait.data:
                if self.this_trait.data[item].name == sample:
                    value = start_vars['value:' + self.this_trait.data[item].name]
                    self.samples.append(self.this_trait.data[item].name)
                    self.vals.append(value)
                    in_trait_data = True
                    break
            if not in_trait_data:
                value = start_vars.get('value:' + sample)
                if value:
                    self.samples.append(sample)
                    self.vals.append(value)

        self.mapping_method = start_vars['method']
        if start_vars['manhattan_plot'] == "True":
            self.manhattan_plot = True
        else:
            self.manhattan_plot = False

        self.maf = start_vars['maf'] # Minor allele frequency
        self.suggestive = ""
        self.significant = ""
        self.pair_scan = False # Initializing this since it is checked in views to determine which template to use
        self.score_type = "LRS" #ZS: LRS or LOD
        self.mapping_scale = "physic"
        self.num_perm = 0
        self.perm_output = []
        self.bootstrap_results = []

        #ZS: This is passed to GN1 code for single chr mapping
        self.selected_chr = -1
        if "selected_chr" in start_vars:
            if int(start_vars['selected_chr']) != -1: #ZS: Needs to be -1 if showing full map; there's probably a better way to fix this
                self.selected_chr = int(start_vars['selected_chr']) + 1
            else:
                self.selected_chr = int(start_vars['selected_chr'])
        if "startMb" in start_vars:
            self.startMb = start_vars['startMb']
        if "endMb" in start_vars:
            self.endMb = start_vars['endMb']
        if "graphWidth" in start_vars:
            self.graphWidth = start_vars['graphWidth']
        if "lrsMax" in start_vars:
            self.lrsMax = start_vars['lrsMax']
        if "haplotypeAnalystCheck" in start_vars:
            self.haplotypeAnalystCheck = start_vars['haplotypeAnalystCheck']
        if "startMb" in start_vars: #ZS: This is to ensure showGenes, Legend, etc are checked the first time you open the mapping page, since startMb will only not be set during the first load
            if "permCheck" in start_vars:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.num_perm = int(start_vars['num_perm'])

            self.LRSCheck = start_vars['LRSCheck']

            if "showSNP" in start_vars:
                self.showSNP = start_vars['showSNP']
            else:
                self.showSNP = False

            if "showGenes" in start_vars:
                self.showGenes = start_vars['showGenes']
            else:
                self.showGenes = False

            if "viewLegend" in start_vars:
                self.viewLegend = start_vars['viewLegend']
            else:
                self.viewLegend = False
        else:
            try:
                if int(start_vars['num_perm']) > 0:
                    self.num_perm = int(start_vars['num_perm'])
            except:
                self.num_perm = 0

            if self.num_perm > 0:
                self.permCheck = "ON"
            else:
                self.permCheck = False
            self.showSNP = "ON"
            self.showGenes = "ON"
            self.viewLegend = "ON"

        self.dataset.group.get_markers()
        if self.mapping_method == "gemma":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            with Bench("Running GEMMA"):
                marker_obs = gemma_mapping.run_gemma(self.dataset, self.samples, self.vals)
            results = marker_obs
        elif self.mapping_method == "rqtl_plink":
            results = self.run_rqtl_plink()
        elif self.mapping_method == "rqtl_geno":
            self.score_type = "LOD"
            self.mapping_scale = "morgan"
            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            self.method = start_vars['mapmethod_rqtl_geno']
            self.model = start_vars['mapmodel_rqtl_geno']
            if start_vars['pair_scan'] == "true":
                self.pair_scan = True
            if self.permCheck and self.num_perm > 0:
                self.perm_output, self.suggestive, self.significant, results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
            else:
                results = rqtl_mapping.run_rqtl_geno(self.vals, self.dataset, self.method, self.model, self.permCheck, self.num_perm, self.do_control, self.control_marker, self.manhattan_plot, self.pair_scan)
        elif self.mapping_method == "reaper":
            if "startMb" in start_vars: #ZS: Check if first time page loaded, so it can default to ON
                if "additiveCheck" in start_vars:
                    self.additiveCheck = start_vars['additiveCheck']
                else:
                    self.additiveCheck = False

                if "bootCheck" in start_vars:
                    self.bootCheck = "ON"
                else:
                    self.bootCheck = False
                self.num_bootstrap = int(start_vars['num_bootstrap'])
            else:
                self.additiveCheck = "ON"
                try:
                    if int(start_vars['num_bootstrap']) > 0:
                        self.bootCheck = "ON"
                        self.num_bootstrap = int(start_vars['num_bootstrap'])
                    else:
                        self.bootCheck = False
                        self.num_bootstrap = 0
                except:
                    self.bootCheck = False
                    self.num_bootstrap = 0

            self.control_marker = start_vars['control_marker']
            self.do_control = start_vars['do_control']
            self.dataset.group.genofile = start_vars['genofile']
            logger.info("Running qtlreaper")
            results, self.json_data, self.perm_output, self.suggestive, self.significant, self.bootstrap_results = qtlreaper_mapping.gen_reaper_results(self.this_trait,
                                                                                                                                                        self.dataset,
                                                                                                                                                        self.samples,
                                                                                                                                                        self.json_data,
                                                                                                                                                        self.num_perm,
                                                                                                                                                        self.bootCheck,
                                                                                                                                                        self.num_bootstrap,
                                                                                                                                                        self.do_control,
                                                                                                                                                        self.control_marker,
                                                                                                                                                        self.manhattan_plot)
        elif self.mapping_method == "plink":
            self.score_type = "-log(p)"
            self.manhattan_plot = True
            results = plink_mapping.run_plink(self.this_trait, self.dataset, self.species, self.vals, self.maf)
            #results = self.run_plink()
        elif self.mapping_method == "pylmm":
            logger.debug("RUNNING PYLMM")
            self.dataset.group.genofile = start_vars['genofile']
            if self.num_perm > 0:
                self.run_permutations(str(temp_uuid))
            results = self.gen_data(str(temp_uuid))
        else:
            logger.debug("RUNNING NOTHING")

        if self.pair_scan == True:
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr1'] > 0 or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                    if marker['chr1'] > highest_chr or marker['chr1'] == "X" or marker['chr1'] == "X/Y":
                        highest_chr = marker['chr1']
                    if 'lod_score' in marker.keys():
                        self.qtl_results.append(marker)

            self.trimmed_markers = results

            for qtl in enumerate(self.qtl_results):
                self.json_data['chr1'].append(str(qtl['chr1']))
                self.json_data['chr2'].append(str(qtl['chr2']))
                self.json_data['Mb'].append(qtl['Mb'])
                self.json_data['markernames'].append(qtl['name'])

            self.js_data = dict(
                json_data = self.json_data,
                this_trait = self.this_trait.name,
                data_set = self.dataset.name,
                maf = self.maf,
                manhattan_plot = self.manhattan_plot,
                mapping_scale = self.mapping_scale,
                qtl_results = self.qtl_results
            )

        else:
            self.cutoff = 2
            self.qtl_results = []
            highest_chr = 1 #This is needed in order to convert the highest chr to X/Y
            for marker in results:
                if marker['chr'] > 0 or marker['chr'] == "X" or marker['chr'] == "X/Y":
                    if marker['chr'] > highest_chr or marker['chr'] == "X" or marker['chr'] == "X/Y":
                        highest_chr = marker['chr']
                    if ('lod_score' in marker.keys()) or ('lrs_value' in marker.keys()):
                        self.qtl_results.append(marker)

            self.trimmed_markers = trim_markers_for_table(results)

            if self.mapping_method != "gemma":
                self.json_data['chr'] = []
                self.json_data['pos'] = []
                self.json_data['lod.hk'] = []
                self.json_data['markernames'] = []

                self.json_data['suggestive'] = self.suggestive
                self.json_data['significant'] = self.significant

                #Need to convert the QTL objects that qtl reaper returns into a json serializable dictionary
                for index, qtl in enumerate(self.qtl_results):
                    #if index<40:
                    #    logger.debug("lod score is:", qtl['lod_score'])
                    if qtl['chr'] == highest_chr and highest_chr != "X" and highest_chr != "X/Y":
                        #logger.debug("changing to X")
                        self.json_data['chr'].append("X")
                    else:
                        self.json_data['chr'].append(str(qtl['chr']))
                    self.json_data['pos'].append(qtl['Mb'])
                    if 'lrs_value' in qtl.keys():
                        self.json_data['lod.hk'].append(str(qtl['lrs_value']))
                    else:
                        self.json_data['lod.hk'].append(str(qtl['lod_score']))
                    self.json_data['markernames'].append(qtl['name'])

                #Get chromosome lengths for drawing the interval map plot
                chromosome_mb_lengths = {}
                self.json_data['chrnames'] = []
                for key in self.species.chromosomes.chromosomes.keys():
                    self.json_data['chrnames'].append([self.species.chromosomes.chromosomes[key].name, self.species.chromosomes.chromosomes[key].mb_length])
                    chromosome_mb_lengths[key] = self.species.chromosomes.chromosomes[key].mb_length

                # logger.debug("json_data:", self.json_data)

                self.js_data = dict(
                    result_score_type = self.score_type,
                    json_data = self.json_data,
                    this_trait = self.this_trait.name,
                    data_set = self.dataset.name,
                    maf = self.maf,
                    manhattan_plot = self.manhattan_plot,
                    mapping_scale = self.mapping_scale,
                    chromosomes = chromosome_mb_lengths,
                    qtl_results = self.qtl_results,
                    num_perm = self.num_perm,
                    perm_results = self.perm_output,
                )
Beispiel #49
0
    def run_analysis(self, requestform):
        logger.info("Starting CTL analysis on dataset")
        self.trait_db_list = [
            trait.strip() for trait in requestform['trait_list'].split(',')
        ]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        logger.debug("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        logger.debug("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        logger.debug("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        logger.debug("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        logger.debug("dataset group: ", dataset.group)
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
            markernames.append(marker["name"])
            markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        logger.debug(
            len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(
            ro.r.matrix(r_unlist(genotypes),
                        nrow=len(markernames),
                        ncol=len(individuals),
                        dimnames=r_list(markernames, individuals),
                        byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
            logger.debug("retrieving data for", trait)
            if trait != "":
                ts = trait.split(':')
                gt = create_trait(name=ts[0], dataset_name=ts[1])
                gt = retrieve_sample_data(gt, dataset, individuals)
                for ind in individuals:
                    if ind in list(gt.data.keys()):
                        traits.append(gt.data[ind].value)
                    else:
                        traits.append("-999")

        rPheno = r_t(
            ro.r.matrix(r_as_numeric(r_unlist(traits)),
                        nrow=len(self.trait_db_list),
                        ncol=len(individuals),
                        dimnames=r_list(self.trait_db_list, individuals),
                        byrow=True))

        logger.debug(rPheno)

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names=False)
        rGeno = r_data_frame(rGeno, check_names=False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno,
                             rPheno,
                             strategy=strategy,
                             nperm=nperm,
                             parametric=parametric,
                             nthreads=6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance=significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results[
            'requestform'] = requestform  # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'],
              width=1000,
              height=600,
              type='cairo-png')
        self.r_lineplot(res, significance=significance)
        r_dev_off()

        n = 2  # We start from 2, since R starts from 1 :)
        for trait in self.trait_db_list:
            # Create the QTL like CTL plots
            self.results['imgurl' +
                         str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
            self.results[
                'imgloc' +
                str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
            r_png(self.results['imgloc' + str(n)],
                  width=1000,
                  height=600,
                  type='cairo-png')
            self.r_plotCTLobject(res, (n - 1),
                                 significance=significance,
                                 main='Phenotype ' + trait)
            r_dev_off()
            n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        if not isinstance(significant, ri.RNULLType):
            for x in range(len(significant[0])):
                logger.debug(significant[0][x], significant[1][x],
                             significant[2][x])  # Debug to console
                tsS = significant[0][x].split(':')  # Source
                tsT = significant[2][x].split(':')  # Target
                gtS = create_trait(
                    name=tsS[0],
                    dataset_name=tsS[1])  # Retrieve Source info from the DB
                gtT = create_trait(
                    name=tsT[0],
                    dataset_name=tsT[1])  # Retrieve Target info from the DB
                self.addNode(gtS)
                self.addNode(gtT)
                self.addEdge(gtS, gtT, significant, x)

                significant[0][x] = "{} ({})".format(
                    gtS.symbol,
                    gtS.name)  # Update the trait name for the displayed table
                significant[2][x] = "{} ({})".format(
                    gtT.symbol,
                    gtT.name)  # Update the trait name for the displayed table

        self.elements = json.dumps(self.nodes_list + self.edges_list)
Beispiel #50
0
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
    """Generates p-values for each marker using GEMMA"""

    if this_dataset.group.genofile != None:
        genofile_name = this_dataset.group.genofile[:-5]
    else:
        genofile_name = this_dataset.group.name

    gen_pheno_txt_file(this_dataset, genofile_name, vals, method)

    if not os.path.isfile("{}{}_output.assoc.txt".format(
            webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
        open(
            "{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR,
                                           genofile_name), "w+")

    this_chromosomes = this_dataset.species.chromosomes.chromosomes
    chr_list_string = ""
    for i in range(len(this_chromosomes)):
        if i < (len(this_chromosomes) - 1):
            chr_list_string += this_chromosomes[i + 1].name + ","
        else:
            chr_list_string += this_chromosomes[i + 1].name

    if covariates != "":
        gen_covariates_file(this_dataset, covariates)

    if method == "gemma":
        gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (
            flat_files('mapping'), this_dataset.group.name,
            flat_files('mapping'), this_dataset.group.name)
        if covariates != "":
            gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (
                flat_files('mapping'), this_dataset.group.name,
                webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)
        else:
            #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
            gemma_command += ' -outdir %s -o %s_output' % (
                webqtlConfig.GENERATED_IMAGE_DIR, this_dataset.group.name)
    else:
        if use_loco == "True":
            k_output_filename = this_dataset.group.name + "_K_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                k_output_filename)
            logger.debug("k_command:" + generate_k_command)
            os.system(generate_k_command)

            gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (
                TEMPDIR, k_output_filename, flat_files('genotype/bimbam'),
                genofile_name, flat_files('genotype/bimbam'), genofile_name)

            gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(
                random.choice(string.ascii_uppercase + string.digits)
                for _ in range(6))
            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (
                    flat_files('mapping'), this_dataset.group.name,
                    flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                    gwa_output_filename)
            else:
                gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (
                    flat_files('genotype/bimbam'), genofile_name, TEMPDIR,
                    gwa_output_filename)

        else:
            gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name,
                flat_files('genotype/bimbam'), genofile_name)

            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (
                    flat_files('mapping'), this_dataset.group.name,
                    webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)
            else:
                gemma_command += ' -outdir %s -debug -o %s_output' % (
                    webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)

    logger.debug("gemma_command:" + gemma_command)
    os.system(gemma_command)

    if use_loco == "True":
        marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
    else:
        marker_obs = parse_gemma_output(genofile_name)

    return marker_obs
Beispiel #51
0
def run_gemma(this_dataset, samples, vals, covariates, method, use_loco):
    """Generates p-values for each marker using GEMMA"""

    if this_dataset.group.genofile != None:
        genofile_name = this_dataset.group.genofile[:-5]
    else:
        genofile_name = this_dataset.group.name

    gen_pheno_txt_file(this_dataset, genofile_name, vals, method)

    if not os.path.isfile("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name)):
        open("{}{}_output.assoc.txt".format(webqtlConfig.GENERATED_IMAGE_DIR, genofile_name), "w+")

    this_chromosomes = this_dataset.species.chromosomes.chromosomes
    chr_list_string = ""
    for i in range(len(this_chromosomes)):
        if i < (len(this_chromosomes) - 1):
            chr_list_string += this_chromosomes[i+1].name + ","
        else:
            chr_list_string += this_chromosomes[i+1].name  

    if covariates != "":
        gen_covariates_file(this_dataset, covariates)

    if method == "gemma":
        gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('mapping'),
                                                                                        this_dataset.group.name,
                                                                                        flat_files('mapping'),
                                                                                        this_dataset.group.name)
        if covariates != "":
            gemma_command += ' -c %s/%s_covariates.txt -outdir %s -o %s_output' % (flat_files('mapping'),
                                                                                   this_dataset.group.name,
                                                                                   webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                   this_dataset.group.name)
        else:
            #gemma_command = GEMMA_COMMAND + ' -bfile %s/%s -k %s/%s.sXX.txt -lmm 1 -maf 0.1 -o %s_output' % (flat_files('mapping'),
            gemma_command += ' -outdir %s -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
                                                           this_dataset.group.name)
    else:
        if use_loco == "True":
            k_output_filename = this_dataset.group.name + "_K_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
            generate_k_command = GEMMA_WRAPPER_COMMAND + ' --json --loco ' + chr_list_string + ' -- -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -gk -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            TEMPDIR,
                                                                                            k_output_filename)
            logger.debug("k_command:" + generate_k_command)
            os.system(generate_k_command)

            gemma_command = GEMMA_WRAPPER_COMMAND + ' --json --loco --input %s/gn2/%s.json -- -g %s/%s_geno.txt -p %s/%s_pheno.txt' % (TEMPDIR,
                                                                                            k_output_filename,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name)

            gwa_output_filename = this_dataset.group.name + "_GWA_" + ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(6))
            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('mapping'),
                                                                                                                                         this_dataset.group.name,
                                                                                                                                         flat_files('genotype/bimbam'),
                                                                                                                                         genofile_name,
                                                                                                                                         TEMPDIR,
                                                                                                                                         gwa_output_filename)
            else:
                gemma_command += ' -a %s/%s_snps.txt -lmm 1 -maf 0.1 -debug > %s/gn2/%s.json' % (flat_files('genotype/bimbam'),
                                                                                                                 genofile_name,
                                                                                                                 TEMPDIR,
                                                                                                                 gwa_output_filename)

        else:
            gemma_command = GEMMA_COMMAND + ' -g %s/%s_geno.txt -p %s/%s_pheno.txt -a %s/%s_snps.txt -k %s/%s.cXX.txt -lmm 1 -maf 0.1' % (flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name,
                                                                                            flat_files('genotype/bimbam'),
                                                                                            genofile_name)

            if covariates != "":
                gemma_command += ' -c %s/%s_covariates.txt -outdir %s -debug -o %s_output' % (flat_files('mapping'),
                                                                                                             this_dataset.group.name,
                                                                                                             webqtlConfig.GENERATED_IMAGE_DIR,
                                                                                                             genofile_name)
            else:
                gemma_command += ' -outdir %s -debug -o %s_output' % (webqtlConfig.GENERATED_IMAGE_DIR,
                                                                      genofile_name)

    logger.debug("gemma_command:" + gemma_command)
    os.system(gemma_command)

    if use_loco == "True":
        marker_obs = parse_loco_output(this_dataset, gwa_output_filename)
    else:
        marker_obs = parse_gemma_output(genofile_name)

    return marker_obs