Ejemplo n.º 1
0
def create_trait(**kw):
    assert bool(kw.get('dataset')) != bool(
        kw.get('dataset_name')), "Needs dataset ob. or name"

    assert bool(kw.get('name')), "Needs trait name"

    if bool(kw.get('dataset')):
        dataset = kw.get('dataset')
    else:
        if kw.get('dataset_name') != "Temp":
            dataset = create_dataset(kw.get('dataset_name'))
        else:
            dataset = create_dataset("Temp", group_name=kw.get('group_name'))

    if dataset.type == 'Publish':
        permissions = check_resource_availability(dataset, kw.get('name'))
    else:
        permissions = check_resource_availability(dataset)

    if permissions['data'] != "no-access":
        the_trait = GeneralTrait(**kw)
        if the_trait.dataset.type != "Temp":
            the_trait = retrieve_trait_info(
                the_trait,
                the_trait.dataset,
                get_qtl_info=kw.get('get_qtl_info'))
        return the_trait
    else:
        return None
Ejemplo n.º 2
0
def view_collection():
    params = request.args

    uc_id = params['uc_id']
    uc = (collection for collection in g.user_session.user_collections
          if collection["id"] == uc_id).next()
    traits = uc["members"]

    trait_obs = []
    json_version = []

    for atrait in traits:
        name, dataset_name = atrait.split(':')
        if dataset_name == "Temp":
            group = name.split("_")[2]
            dataset = create_dataset(dataset_name,
                                     dataset_type="Temp",
                                     group_name=group)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
        else:
            dataset = create_dataset(dataset_name)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
            trait_ob = trait.retrieve_trait_info(trait_ob,
                                                 dataset,
                                                 get_qtl_info=True)
        trait_obs.append(trait_ob)

        json_version.append(trait.jsonable(trait_ob))

    collection_info = dict(trait_obs=trait_obs, uc=uc)

    if "json" in params:
        return json.dumps(json_version)
    else:
        return render_template("collections/view.html", **collection_info)
Ejemplo n.º 3
0
def loading_page():
    # logger.info(request.url)
    initial_start_vars = request.form
    start_vars_container = {}
    n_samples = 0  # ZS: So it can be displayed on loading page
    if 'wanted_inputs' in initial_start_vars:
        wanted = initial_start_vars['wanted_inputs'].split(",")
        start_vars = {}
        for key, value in list(initial_start_vars.items()):
            if key in wanted:
                start_vars[key] = value

        sample_vals_dict = json.loads(start_vars['sample_vals'])
        if 'n_samples' in start_vars:
            n_samples = int(start_vars['n_samples'])
        else:
            if 'group' in start_vars:
                dataset = create_dataset(start_vars['dataset'],
                                         group_name=start_vars['group'])
            else:
                dataset = create_dataset(start_vars['dataset'])
            samples = dataset.group.samplelist
            if 'genofile' in start_vars:
                if start_vars['genofile'] != "":
                    genofile_string = start_vars['genofile']
                    dataset.group.genofile = genofile_string.split(":")[0]
                    genofile_samples = run_mapping.get_genofile_samplelist(
                        dataset)
                    if len(genofile_samples) > 1:
                        samples = genofile_samples

            for sample in samples:
                if sample in sample_vals_dict:
                    if sample_vals_dict[sample] != "x":
                        n_samples += 1

        start_vars['n_samples'] = n_samples
        start_vars['vals_hash'] = generate_hash_of_string(
            str(sample_vals_dict))
        if start_vars[
                'dataset'] != "Temp":  # Currently can't get diff for temp traits
            start_vars['vals_diff'] = get_diff_of_vals(
                sample_vals_dict,
                str(start_vars['trait_id'] + ":" + str(start_vars['dataset'])))

        start_vars['wanted_inputs'] = initial_start_vars['wanted_inputs']

        start_vars_container['start_vars'] = start_vars
    else:
        start_vars_container['start_vars'] = initial_start_vars

    rendered_template = render_template("loading.html", **start_vars_container)

    return rendered_template
Ejemplo n.º 4
0
def do_correlation(start_vars):
    assert ('db' in start_vars)
    assert ('target_db' in start_vars)
    assert ('trait_id' in start_vars)

    this_dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    target_dataset = data_set.create_dataset(
        dataset_name=start_vars['target_db'])
    this_trait = GeneralTrait(dataset=this_dataset,
                              name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, this_dataset)

    corr_params = init_corr_params(start_vars)

    corr_results = calculate_results(this_trait, this_dataset, target_dataset,
                                     corr_params)
    #corr_results = collections.OrderedDict(sorted(corr_results.items(), key=lambda t: -abs(t[1][0])))

    final_results = []
    for _trait_counter, trait in enumerate(
            corr_results.keys()[:corr_params['return_count']]):
        if corr_params['type'] == "tissue":
            [sample_r, num_overlap, sample_p, symbol] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p,
                "symbol": symbol
            }
        elif corr_params['type'] == "literature" or corr_params[
                'type'] == "lit":
            [gene_id, sample_r] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "gene_id": gene_id
            }
        else:
            [sample_r, sample_p, num_overlap] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p
            }

        final_results.append(result_dict)

    # json_corr_results = generate_corr_json(final_corr_results, this_trait, this_dataset, target_dataset, for_api = True)

    return final_results
Ejemplo n.º 5
0
    def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
        # xor assertion
        assert bool(kw.get('dataset')) != bool(
            kw.get('dataset_name')), "Needs dataset ob. or name"
        # Trait ID, ProbeSet ID, Published ID, etc.
        self.name = kw.get('name')
        if kw.get('dataset_name'):
            if kw.get('dataset_name') == "Temp":
                temp_group = self.name.split("_")[2]
                self.dataset = create_dataset(dataset_name="Temp",
                                              dataset_type="Temp",
                                              group_name=temp_group)
            else:
                self.dataset = create_dataset(kw.get('dataset_name'))
        else:
            self.dataset = kw.get('dataset')
        self.cellid = kw.get('cellid')
        self.identification = kw.get('identification', 'un-named trait')
        self.haveinfo = kw.get('haveinfo', False)
        # Blat sequence, available for ProbeSet
        self.sequence = kw.get('sequence')
        self.data = kw.get('data', {})
        self.view = True

        # Sets defaults
        self.locus = None
        self.lrs = None
        self.pvalue = None
        self.mean = None
        self.additive = None
        self.num_overlap = None
        self.strand_probe = None
        self.symbol = None
        self.display_name = self.name

        self.LRS_score_repr = "N/A"
        self.LRS_location_repr = "N/A"

        if kw.get('fullname'):
            name2 = value.split("::")
            if len(name2) == 2:
                self.dataset, self.name = name2
                # self.cellid is set to None above
            elif len(name2) == 3:
                self.dataset, self.name, self.cellid = name2

        # Todo: These two lines are necessary most of the time, but
        # perhaps not all of the time So we could add a simple if
        # statement to short-circuit this if necessary
        if get_sample_info is not False:
            self = retrieve_sample_data(self, self.dataset)
def view_collection():
    params = request.args
    print("PARAMS in view collection:", params)

    if "uc_id" in params:
        uc_id = params['uc_id']
        uc = model.UserCollection.query.get(uc_id)
        traits = json.loads(uc.members)
    else:
        user_collections = json.loads(Redis.get(user_manager.AnonUser().key))
        this_collection = {}
        for collection in user_collections:
            if collection['id'] == params['collection_id']:
                this_collection = collection
                break
        #this_collection = user_collections[params['collection_id']]
        traits = this_collection['members']

    print("in view_collection traits are:", traits)

    trait_obs = []
    json_version = []

    for atrait in traits:
        name, dataset_name = atrait.split(':')
        if dataset_name == "Temp":
            group = name.split("_")[2]
            dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
        else:
            dataset = create_dataset(dataset_name)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
            trait_ob = trait.retrieve_trait_info(trait_ob, dataset, get_qtl_info=True)
        trait_obs.append(trait_ob)

        json_version.append(trait.jsonable(trait_ob))

    if "uc_id" in params:
        collection_info = dict(trait_obs=trait_obs,
                               uc = uc)
    else:
        collection_info = dict(trait_obs=trait_obs,
                               collection_name=this_collection['name'])
    if "json" in params:
        print("json_version:", json_version)
        return json.dumps(json_version)
    else:
        return render_template("collections/view.html",
                           **collection_info
                           )
Ejemplo n.º 7
0
    def __init__(self, get_qtl_info=False, **kw):
        # xor assertion
        assert bool(kw.get("dataset")) != bool(kw.get("dataset_name")), "Needs dataset ob. or name"
        if kw.get("dataset_name"):
            self.dataset = create_dataset(kw.get("dataset_name"))
            print(" in GeneralTrait created dataset:", self.dataset)
        else:
            self.dataset = kw.get("dataset")
        self.name = kw.get("name")  # Trait ID, ProbeSet ID, Published ID, etc.
        self.cellid = kw.get("cellid")
        self.identification = kw.get("identification", "un-named trait")
        self.haveinfo = kw.get("haveinfo", False)
        self.sequence = kw.get("sequence")  # Blat sequence, available for ProbeSet
        self.data = kw.get("data", {})

        # Sets defaults
        self.locus = None
        self.lrs = None
        self.pvalue = None
        self.mean = None
        self.num_overlap = None

        if kw.get("fullname"):
            name2 = value.split("::")
            if len(name2) == 2:
                self.dataset, self.name = name2
                # self.cellid is set to None above
            elif len(name2) == 3:
                self.dataset, self.name, self.cellid = name2

        # Todo: These two lines are necessary most of the time, but perhaps not all of the time
        # So we could add a simple if statement to short-circuit this if necessary
        self.retrieve_info(get_qtl_info=get_qtl_info)
        self.retrieve_sample_data()
Ejemplo n.º 8
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open("{}/{}_covariates.txt".format(flat_files('mapping'), this_dataset.group.name), "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Ejemplo n.º 9
0
def create_target_this_trait(start_vars):
    """this function creates the required trait and target dataset for correlation"""

    if start_vars['dataset'] == "Temp":
        this_dataset = data_set.create_dataset(dataset_name="Temp",
                                               dataset_type="Temp",
                                               group_name=start_vars['group'])
    else:
        this_dataset = data_set.create_dataset(
            dataset_name=start_vars['dataset'])
    target_dataset = data_set.create_dataset(
        dataset_name=start_vars['corr_dataset'])
    this_trait = create_trait(dataset=this_dataset,
                              name=start_vars['trait_id'])
    sample_data = ()
    return (this_dataset, this_trait, target_dataset, sample_data)
Ejemplo n.º 10
0
    def __init__(self, start_vars):
        self.dataset1 = data_set.create_dataset(start_vars["dataset1"])
        self.trait1 = GeneralTrait(dataset=self.dataset1.name, name=start_vars["trait1"])

        self.dataset2 = data_set.create_dataset(start_vars["dataset2"])
        self.trait2 = GeneralTrait(dataset=self.dataset2.name, name=start_vars["trait2"])

        sample_names_1 = self.get_sample_names(self.dataset1)
        sample_names_2 = self.get_sample_names(self.dataset2)

        self.samples_1 = self.get_samples(self.dataset1, sample_names_1, self.trait1)
        self.samples_2 = self.get_samples(self.dataset2, sample_names_2, self.trait2)

        coords = {}
        for sample in self.samples_1:
            coords[sample.name] = sample.val
Ejemplo n.º 11
0
def gen_covariates_file(this_dataset, covariates, samples):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = create_trait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        trait_sample_data = trait_ob.data
        for index, sample in enumerate(trait_samples):
            if sample in samples:
                if sample in trait_sample_data:
                    sample_value = trait_sample_data[sample].value
                    this_covariate_data.append(sample_value)
                else:
                    this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open((f"{flat_files('mapping')}/"
               f"{this_dataset.group.name}_covariates.txt"), "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Ejemplo n.º 12
0
def gen_covariates_file(this_dataset, covariates):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        #trait_samples = this_dataset.group.all_samples_ordered()
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        logger.debug("SAMPLES:", trait_samples)
        trait_sample_data = trait_ob.data
        logger.debug("SAMPLE DATA:", trait_sample_data)
        for index, sample in enumerate(trait_samples):
            if sample in trait_sample_data:
                sample_value = trait_sample_data[sample].value
                this_covariate_data.append(sample_value)
            else:
                this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    with open(
            "{}/{}_covariates.txt".format(flat_files('mapping'),
                                          this_dataset.group.name),
            "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")
Ejemplo n.º 13
0
    def run_analysis(self, requestform):
        print("Starting ePheWAS analysis on dataset")
        genofilelocation = locate("BXD.geno", "genotype")                                  # Get the location of the BXD genotypes
        tissuealignerloc = locate("Tissue_color_aligner.csv", "auwerx")                       # Get the location of the Tissue_color_aligner

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)

        # Print some debug
        print "self.trait_id:" + self.trait_id + "\n"
        print "self.datasetname:" + self.datasetname + "\n"
        print "self.dataset.type:" + self.dataset.type + "\n"

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
          snpinfo.append(marker["name"]);
          snpinfo.append(marker["chr"]);
          snpinfo.append(marker["Mb"]);

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        print("Initialization of ePheWAS done !")
Ejemplo n.º 14
0
def loading_page():
    logger.info(request.url)
    initial_start_vars = request.form
    start_vars_container = {}
    n_samples = 0  #ZS: So it can be displayed on loading page
    if 'wanted_inputs' in initial_start_vars:
        wanted = initial_start_vars['wanted_inputs'].split(",")
        start_vars = {}
        for key, value in list(initial_start_vars.items()):
            if key in wanted:
                start_vars[key] = value

        if 'n_samples' in start_vars:
            n_samples = int(start_vars['n_samples'])
        else:
            sample_vals_dict = json.loads(start_vars['sample_vals'])
            if 'group' in start_vars:
                dataset = create_dataset(start_vars['dataset'],
                                         group_name=start_vars['group'])
            else:
                dataset = create_dataset(start_vars['dataset'])
            genofile_samplelist = []
            samples = start_vars['primary_samples'].split(",")
            if 'genofile' in start_vars:
                if start_vars['genofile'] != "":
                    genofile_string = start_vars['genofile']
                    dataset.group.genofile = genofile_string.split(":")[0]
                    genofile_samples = run_mapping.get_genofile_samplelist(
                        dataset)
                    if len(genofile_samples) > 1:
                        samples = genofile_samples

            for sample in samples:
                if sample in sample_vals_dict:
                    if sample_vals_dict[sample] != "x":
                        n_samples += 1

        start_vars['n_samples'] = n_samples
        start_vars['wanted_inputs'] = initial_start_vars['wanted_inputs']

        start_vars_container['start_vars'] = start_vars
    else:
        start_vars_container['start_vars'] = initial_start_vars

    rendered_template = render_template("loading.html", **start_vars_container)

    return rendered_template
    def __init__(self, kw):
        """
            This class gets invoked after hitting submit on the main menu (in
            views.py).
        """

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        self.uc_id = uuid.uuid4()
        self.go_term = None
        logger.debug("uc_id:", self.uc_id)  # contains a unique id

        logger.debug("kw is:", kw)  # dict containing search terms
        if kw['search_terms_or']:
            self.and_or = "or"
            self.search_terms = kw['search_terms_or']
        else:
            self.and_or = "and"
            self.search_terms = kw['search_terms_and']
        search = self.search_terms
        self.original_search_string = self.search_terms
        # check for dodgy search terms
        rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*',
                        re.IGNORECASE)
        if rx.match(search):
            logger.info("Regex failed search")
            self.search_term_exists = False
            return
        else:
            self.search_term_exists = True

        self.results = []
        type = kw.get('type')
        if type == "Phenotypes":  # split datatype on type field
            dataset_type = "Publish"
        elif type == "Genotypes":
            dataset_type = "Geno"
        else:
            dataset_type = "ProbeSet"  # ProbeSet is default

        assert (is_str(kw.get('dataset')))
        self.dataset = create_dataset(kw['dataset'], dataset_type)
        logger.debug("search_terms:", self.search_terms)

        #ZS: I don't like using try/except, but it seems like the easiest way to account for all possible bad searches here
        try:
            self.search()
        except:
            self.search_term_exists = False

        self.too_many_results = False
        if self.search_term_exists:
            if len(self.results) > 50000:
                self.trait_list = []
                self.too_many_results = True
            else:
                self.gen_search_result()
Ejemplo n.º 16
0
def view_collection():
    params = request.args

    if g.user_session.logged_in and "uc_id" in params:
        uc_id = params['uc_id']
        uc = (collection for collection in g.user_session.user_collections if collection["id"] == uc_id).next()
        traits = uc["members"]
    else:
        user_collections = json.loads(Redis.get(user_manager.AnonUser().key))
        this_collection = {}
        for collection in user_collections:
            if collection['id'] == params['collection_id']:
                this_collection = collection
                break

        traits = this_collection['members']

    trait_obs = []
    json_version = []

    for atrait in traits:
        name, dataset_name = atrait.split(':')
        if dataset_name == "Temp":
            group = name.split("_")[2]
            dataset = create_dataset(dataset_name, dataset_type = "Temp", group_name = group)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
        else:
            dataset = create_dataset(dataset_name)
            trait_ob = trait.GeneralTrait(name=name, dataset=dataset)
            trait_ob = trait.retrieve_trait_info(trait_ob, dataset, get_qtl_info=True)
        trait_obs.append(trait_ob)

        json_version.append(trait.jsonable(trait_ob))

    if "uc_id" in params:
        collection_info = dict(trait_obs=trait_obs,
                               uc = uc)
    else:
        collection_info = dict(trait_obs=trait_obs,
                               collection_name=this_collection['name'])
    if "json" in params:
        return json.dumps(json_version)
    else:
        return render_template("collections/view.html",
                           **collection_info
                           )
def geno_db_exists(this_dataset):
    geno_db_name = this_dataset.group.name + "Geno"
    try:
        geno_db = data_set.create_dataset(dataset_name=geno_db_name,
                                          get_samplelist=False)
        return "True"
    except:
        return "False"
Ejemplo n.º 18
0
def jsonable_table_row(trait, dataset_name, index):
    """Return a list suitable for json and intended to be displayed in a table

    Actual turning into json doesn't happen here though"""

    dataset = create_dataset(dataset_name)
    
    if dataset.type == "ProbeSet":
        if trait.mean == "":
            mean = "N/A"
        else:
            mean = "%.3f" % round(float(trait.mean), 2)
        if trait.additive == "":
            additive = "N/A"
        else:
            additive = "%.3f" % round(float(trait.additive), 2)
        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                index,
                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                trait.symbol,
                trait.description_display,
                trait.location_repr,
                mean, 
                trait.LRS_score_repr,
                trait.LRS_location_repr,
                additive]
    elif dataset.type == "Publish":
        if trait.additive == "":
            additive = "N/A"
        else:
            additive = "%.2f" % round(float(trait.additive), 2)
        if trait.pubmed_id:
            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                    index,
                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                    trait.description_display,
                    trait.authors,
                    '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
                    trait.LRS_score_repr,
                    trait.LRS_location_repr,
                    additive]
        else:
            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                    index,
                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                    trait.description_display,
                    trait.authors,
                    trait.pubmed_text,
                    trait.LRS_score_repr,
                    trait.LRS_location_repr,
                    additive]
    elif dataset.type == "Geno":
        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + user_manager.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                index,
                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                trait.location_repr]
    else:
        return dict()
Ejemplo n.º 19
0
def jsonable_table_row(trait, dataset_name, index):
    """Return a list suitable for json and intended to be displayed in a table

    Actual turning into json doesn't happen here though"""

    dataset = create_dataset(dataset_name)
    
    if dataset.type == "ProbeSet":
        if trait.mean == "":
            mean = "N/A"
        else:
            mean = "%.3f" % round(float(trait.mean), 2)
        if trait.additive == "":
            additive = "N/A"
        else:
            additive = "%.3f" % round(float(trait.additive), 2)
        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                index,
                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                trait.symbol,
                trait.description_display,
                trait.location_repr,
                mean, 
                trait.LRS_score_repr,
                trait.LRS_location_repr,
                additive]
    elif dataset.type == "Publish":
        if trait.additive == "":
            additive = "N/A"
        else:
            additive = "%.2f" % round(float(trait.additive), 2)
        if trait.pubmed_id:
            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                    index,
                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                    trait.description_display,
                    trait.authors,
                    '<a href="' + trait.pubmed_link + '">' + trait.pubmed_text + '</href>',
                    trait.LRS_score_repr,
                    trait.LRS_location_repr,
                    additive]
        else:
            return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                    index,
                    '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                    trait.description_display,
                    trait.authors,
                    trait.pubmed_text,
                    trait.LRS_score_repr,
                    trait.LRS_location_repr,
                    additive]
    elif dataset.type == "Geno":
        return ['<input type="checkbox" name="searchResult" class="checkbox trait_checkbox" value="' + hmac.data_hmac('{}:{}'.format(str(trait.name), dataset.name)) + '">',
                index,
                '<a href="/show_trait?trait_id='+str(trait.name)+'&dataset='+dataset.name+'">'+str(trait.name)+'</a>',
                trait.location_repr]
    else:
        return dict()
Ejemplo n.º 20
0
    def __init__(self, kw):
        """This class gets invoked after hitting submit on the main menu (in
views.py).

        """

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.uc_id = uuid.uuid4()
        logger.debug("uc_id:", self.uc_id)  # contains a unique id

        logger.debug("kw is:", kw)  # dict containing search terms
        if kw['search_terms_or']:
            self.and_or = "or"
            self.search_terms = kw['search_terms_or']
        else:
            self.and_or = "and"
            self.search_terms = kw['search_terms_and']
        search = self.search_terms
        # check for dodgy search terms
        rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*',
                        re.IGNORECASE)
        if rx.match(search):
            logger.info("Regex failed search")
            self.search_term_exists = False
            return
        else:
            self.search_term_exists = True

        self.results = []
        type = kw.get('type')
        if type == "Phenotypes":  # split datatype on type field
            dataset_type = "Publish"
        elif type == "Genotypes":
            dataset_type = "Geno"
        else:
            dataset_type = "ProbeSet"  # ProbeSet is default

        assert (is_str(kw.get('dataset')))
        self.dataset = create_dataset(kw['dataset'], dataset_type)
        logger.debug("search_terms:", self.search_terms)
        self.search()
        if self.search_term_exists:
            self.gen_search_result()
Ejemplo n.º 21
0
def do_correlation(start_vars):
    assert('db' in start_vars)
    assert('target_db' in start_vars)
    assert('trait_id' in start_vars)

    this_dataset = data_set.create_dataset(dataset_name=start_vars['db'])
    target_dataset = data_set.create_dataset(
        dataset_name=start_vars['target_db'])
    this_trait = create_trait(dataset=this_dataset,
                              name=start_vars['trait_id'])
    this_trait = retrieve_sample_data(this_trait, this_dataset)

    corr_params = init_corr_params(start_vars)

    corr_results = calculate_results(
        this_trait, this_dataset, target_dataset, corr_params)

    final_results = []
    for _trait_counter, trait in enumerate(list(corr_results.keys())[:corr_params['return_count']]):
        if corr_params['type'] == "tissue":
            [sample_r, num_overlap, sample_p, symbol] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p,
                "symbol": symbol
            }
        elif corr_params['type'] == "literature" or corr_params['type'] == "lit":
            [gene_id, sample_r] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "gene_id": gene_id
            }
        else:
            [sample_r, sample_p, num_overlap] = corr_results[trait]
            result_dict = {
                "trait": trait,
                "sample_r": sample_r,
                "#_strains": num_overlap,
                "p_value": sample_p
            }
        final_results.append(result_dict)
    return final_results
Ejemplo n.º 22
0
    def __init__(self, kw):
        """This class gets invoked after hitting submit on the main menu (in
views.py).

        """

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.uc_id = uuid.uuid4()
        logger.debug("uc_id:", self.uc_id) # contains a unique id

        logger.debug("kw is:", kw)         # dict containing search terms
        if kw['search_terms_or']:
            self.and_or = "or"
            self.search_terms = kw['search_terms_or']
        else:
            self.and_or = "and"
            self.search_terms = kw['search_terms_and']
        search = self.search_terms
        # check for dodgy search terms
        rx = re.compile(r'.*\W(href|http|sql|select|update)\W.*',re.IGNORECASE)
        if rx.match(search):
            logger.info("Regex failed search")
            self.search_term_exists = False
            return
        else:
            self.search_term_exists = True

        self.results = []
        type = kw.get('type')
        if type == "Phenotypes":     # split datatype on type field
            dataset_type = "Publish"
        elif type == "Genotypes":
            dataset_type = "Geno"
        else:
            dataset_type = "ProbeSet"      # ProbeSet is default

        assert(is_str(kw.get('dataset')))
        self.dataset = create_dataset(kw['dataset'], dataset_type)
        logger.debug("search_terms:", self.search_terms)
        self.search()
        if self.search_term_exists:
            self.gen_search_result()
Ejemplo n.º 23
0
 def get_trait_db_obs(self, trait_db_list):
     self.trait_list = []
     for i, trait_db in enumerate(trait_db_list):
         if i == (len(trait_db_list) - 1):
             break
         trait_name, dataset_name = trait_db.split(":")
         dataset_ob = data_set.create_dataset(dataset_name)
         trait_ob = GeneralTrait(dataset=dataset_ob,
                                 name=trait_name,
                                 cellid=None)
         self.trait_list.append((trait_ob, dataset_ob))
Ejemplo n.º 24
0
 def get_trait_db_obs(self, trait_db_list):
     self.trait_list = []
     for i, trait_db in enumerate(trait_db_list):
         if i == (len(trait_db_list) - 1):
             break
         trait_name, dataset_name = trait_db.split(":")
         dataset_ob = data_set.create_dataset(dataset_name)
         trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)
         self.trait_list.append((trait_ob, dataset_ob))
def set_template_vars(start_vars, correlation_data):
    corr_type = start_vars['corr_type']
    corr_method = start_vars['corr_sample_method']

    if start_vars['dataset'] == "Temp":
        this_dataset_ob = create_dataset(dataset_name="Temp",
                                         dataset_type="Temp",
                                         group_name=start_vars['group'])
    else:
        this_dataset_ob = create_dataset(dataset_name=start_vars['dataset'])
    this_trait = create_trait(dataset=this_dataset_ob,
                              name=start_vars['trait_id'])

    correlation_data['this_trait'] = jsonable(this_trait, this_dataset_ob)
    correlation_data['this_dataset'] = this_dataset_ob.as_dict()

    target_dataset_ob = create_dataset(correlation_data['target_dataset'])
    correlation_data['target_dataset'] = target_dataset_ob.as_dict()

    table_json = correlation_json_for_table(correlation_data,
                                            correlation_data['this_trait'],
                                            correlation_data['this_dataset'],
                                            target_dataset_ob)

    correlation_data['table_json'] = table_json

    if target_dataset_ob.type == "ProbeSet":
        filter_cols = [7, 6]
    elif target_dataset_ob.type == "Publish":
        filter_cols = [6, 0]
    else:
        filter_cols = [4, 0]

    correlation_data['corr_method'] = corr_method
    correlation_data['filter_cols'] = filter_cols
    correlation_data['header_fields'] = get_header_fields(
        target_dataset_ob.type, correlation_data['corr_method'])
    correlation_data['formatted_corr_type'] = get_formatted_corr_type(
        corr_type, corr_method)

    return correlation_data
Ejemplo n.º 26
0
def jsonable(trait):
    """Return a dict suitable for using as json

    Actual turning into json doesn't happen here though"""

    dataset = create_dataset(dataset_name = trait.dataset.name, dataset_type = trait.dataset.type, group_name = trait.dataset.group.name)
    
    if dataset.type == "ProbeSet":
        return dict(name=trait.name,
                    symbol=trait.symbol,
                    dataset=dataset.name,
                    dataset_name = dataset.shortname,
                    description=trait.description_display,
                    mean=trait.mean,
                    location=trait.location_repr,
                    lrs_score=trait.LRS_score_repr,
                    lrs_location=trait.LRS_location_repr,
                    additive=trait.additive
                    )
    elif dataset.type == "Publish":
        if trait.pubmed_id:
            return dict(name=trait.name,
                        dataset=dataset.name,
                        dataset_name = dataset.shortname,
                        description=trait.description_display,
                        abbreviation=trait.abbreviation,
                        authors=trait.authors,
                        pubmed_text=trait.pubmed_text,
                        pubmed_link=trait.pubmed_link,
                        lrs_score=trait.LRS_score_repr,
                        lrs_location=trait.LRS_location_repr,
                        additive=trait.additive
                        )
        else:
            return dict(name=trait.name,
                        dataset=dataset.name,
                        dataset_name = dataset.shortname,
                        description=trait.description_display,
                        abbreviation=trait.abbreviation,
                        authors=trait.authors,
                        pubmed_text=trait.pubmed_text,
                        lrs_score=trait.LRS_score_repr,
                        lrs_location=trait.LRS_location_repr,
                        additive=trait.additive
                        )
    elif dataset.type == "Geno":
        return dict(name=trait.name,
                    dataset=dataset.name,
                    dataset_name = dataset.shortname,
                    location=trait.location_repr
                    )
    else:
        return dict()
Ejemplo n.º 27
0
def view_collection():
    params = request.args

    uc_id = params['uc_id']
    uc = next(
        (collection for collection in g.user_session.user_collections if collection["id"] == uc_id))
    traits = uc["members"]

    trait_obs = []
    json_version = []

    for atrait in traits:
        if ':' not in atrait:
            continue
        name, dataset_name = atrait.split(':')
        if dataset_name == "Temp":
            group = name.split("_")[2]
            dataset = create_dataset(
                dataset_name, dataset_type="Temp", group_name=group)
            trait_ob = create_trait(name=name, dataset=dataset)
        else:
            dataset = create_dataset(dataset_name)
            trait_ob = create_trait(name=name, dataset=dataset)
            trait_ob = retrieve_trait_info(
                trait_ob, dataset, get_qtl_info=True)
        trait_obs.append(trait_ob)

        json_version.append(jsonable(trait_ob))

    collection_info = dict(
        trait_obs=trait_obs,
        uc=uc,
        heatmap_data_url=f"{GN_SERVER_URL}heatmaps/clustered")

    if "json" in params:
        return json.dumps(json_version)
    else:
        return render_template("collections/view.html",
                               **collection_info
                               )
Ejemplo n.º 28
0
def gen_covariates_file(this_dataset, covariates, samples):
    covariate_list = covariates.split(",")
    covariate_data_object = []
    for covariate in covariate_list:
        this_covariate_data = []
        trait_name = covariate.split(":")[0]
        dataset_name = covariate.split(":")[1]
        if dataset_name == "Temp":
            temp_group = trait_name.split("_")[2]
            dataset_ob = create_dataset(dataset_name="Temp",
                                        dataset_type="Temp",
                                        group_name=temp_group)
        else:
            dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = create_trait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)
        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        trait_sample_data = trait_ob.data
        for index, sample in enumerate(trait_samples):
            if sample in samples:
                if sample in trait_sample_data:
                    sample_value = trait_sample_data[sample].value
                    this_covariate_data.append(sample_value)
                else:
                    this_covariate_data.append("-9")
        covariate_data_object.append(this_covariate_data)

    filename = "COVAR_" + generate_hash_of_string(
        this_dataset.name + str(covariate_data_object)).replace("/", "_")

    with open((f"{flat_files('mapping')}/" f"{filename}.txt"), "w") as outfile:
        for i in range(len(covariate_data_object[0])):
            for this_covariate in covariate_data_object:
                outfile.write(str(this_covariate[i]) + "\t")
            outfile.write("\n")

    return filename
Ejemplo n.º 29
0
    def __init__(self, kw):

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.quick = False

        self.uc_id = uuid.uuid4()
        print("uc_id:", self.uc_id)

        if 'q' in kw:
            self.results = {}
            self.quick = True
            self.search_terms = kw['q']
            print("self.search_terms is: ", self.search_terms)
            self.trait_type = kw['trait_type']
            self.quick_search()
        else:
            print("kw is:", kw)
            if kw['search_terms_or']:
                self.and_or = "or"
                self.search_terms = kw['search_terms_or']
            else:
                self.and_or = "and"
                self.search_terms = kw['search_terms_and']
            self.search_term_exists = True
            self.results = []
            if kw['type'] == "Phenotypes":
                dataset_type = "Publish"
            elif kw['type'] == "Genotypes":
                dataset_type = "Geno"
            else:
                dataset_type = "ProbeSet"
            self.dataset = create_dataset(kw['dataset'], dataset_type)
            print("KEYWORD:", self.search_terms)
            self.search()
            if self.search_term_exists:
                self.gen_search_result()
Ejemplo n.º 30
0
    def __init__(self, kw):

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.quick = False

        self.uc_id = uuid.uuid4()
        print("uc_id:", self.uc_id)

        if 'q' in kw:
            self.results = {}
            self.quick = True
            self.search_terms = kw['q']
            print("self.search_terms is: ", self.search_terms)
            self.trait_type = kw['trait_type']
            self.quick_search()
        else:
            print("kw is:", kw)
            if kw['search_terms_or']:
                self.and_or = "or"
                self.search_terms = kw['search_terms_or']
            else:
                self.and_or = "and"
                self.search_terms = kw['search_terms_and']
            self.search_term_exists = True
            self.results = []
            if kw['type'] == "Phenotypes":
                dataset_type = "Publish"
            elif kw['type'] == "Genotypes":
                dataset_type = "Geno"
            else:
                dataset_type = "ProbeSet"
            self.dataset = create_dataset(kw['dataset'], dataset_type)
            print("KEYWORD:", self.search_terms)
            self.search()
            if self.search_term_exists:
                self.gen_search_result()
Ejemplo n.º 31
0
    def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
        # xor assertion
        assert bool(kw.get('dataset')) != bool(
            kw.get('dataset_name')), "Needs dataset ob. or name"
        if kw.get('dataset_name'):
            self.dataset = create_dataset(kw.get('dataset_name'))
            #print(" in GeneralTrait created dataset:", self.dataset)
        else:
            self.dataset = kw.get('dataset')
        self.name = kw.get('name')  # Trait ID, ProbeSet ID, Published ID, etc.
        #print("THE NAME IS:", self.name)
        self.cellid = kw.get('cellid')
        self.identification = kw.get('identification', 'un-named trait')
        self.haveinfo = kw.get('haveinfo', False)
        self.sequence = kw.get(
            'sequence')  # Blat sequence, available for ProbeSet
        self.data = kw.get('data', {})

        # Sets defaults
        self.locus = None
        self.lrs = None
        self.pvalue = None
        self.mean = None
        self.additive = None
        self.num_overlap = None
        self.strand_probe = None
        self.symbol = None

        self.LRS_score_repr = "N/A"
        self.LRS_score_value = 0
        self.LRS_location_repr = "N/A"
        self.LRS_location_value = 1000000

        if kw.get('fullname'):
            name2 = value.split("::")
            if len(name2) == 2:
                self.dataset, self.name = name2
                # self.cellid is set to None above
            elif len(name2) == 3:
                self.dataset, self.name, self.cellid = name2

        # Todo: These two lines are necessary most of the time, but perhaps not all of the time
        # So we could add a simple if statement to short-circuit this if necessary
        if self.dataset.type != "Temp":
            self = retrieve_trait_info(self,
                                       self.dataset,
                                       get_qtl_info=get_qtl_info)
            if get_sample_info != False:
                self = retrieve_sample_data(self, self.dataset)
Ejemplo n.º 32
0
    def run(dataset_name, vals, temp_uuid):
        """Generates p-values for each marker"""

        tempdata = temp_data.TempData(temp_uuid)
        
        dataset = data_set.create_dataset(dataset_name)
        species = TheSpecies(dataset=dataset)

        samples = [] # Want only ones with values
        vals = vals

        for sample in dataset.group.samplelist:
            samples.append(str(sample))
            
        gen_data(dataset, vals, tempdata)
Ejemplo n.º 33
0
    def calculate_pca(self, cols, corr_eigen_value, corr_eigen_vectors):
        base = importr('base')
        stats = importr('stats')

        corr_results_to_list = robjects.FloatVector(
            [item for sublist in self.pca_corr_results for item in sublist])

        m = robjects.r.matrix(corr_results_to_list, nrow=len(cols))
        eigen = base.eigen(m)
        pca = stats.princomp(m, cor="TRUE")
        self.loadings = pca.rx('loadings')
        self.scores = pca.rx('scores')
        self.scale = pca.rx('scale')

        trait_array = zScore(self.trait_data_array)
        trait_array_vectors = np.dot(corr_eigen_vectors, trait_array)

        pca_traits = []
        for i, vector in enumerate(trait_array_vectors):
            #ZS: Check if below check is necessary
            #if corr_eigen_value[i-1] > 100.0/len(self.trait_list):
            pca_traits.append((vector * -1.0).tolist())

        this_group_name = self.trait_list[0][1].group.name
        temp_dataset = data_set.create_dataset(dataset_name="Temp",
                                               dataset_type="Temp",
                                               group_name=this_group_name)
        temp_dataset.group.get_samplelist()
        for i, pca_trait in enumerate(pca_traits):
            trait_id = "PCA" + str(
                i + 1
            ) + "_" + temp_dataset.group.species + "_" + this_group_name + "_" + datetime.datetime.now(
            ).strftime("%m%d%H%M%S")
            this_vals_string = ""
            position = 0
            for sample in temp_dataset.group.all_samples_ordered():
                if sample in self.shared_samples_list:
                    this_vals_string += str(pca_trait[position])
                    this_vals_string += " "
                    position += 1
                else:
                    this_vals_string += "x "
            this_vals_string = this_vals_string[:-1]

            Redis.set(trait_id, this_vals_string, ex=THIRTY_DAYS)
            self.pca_trait_ids.append(trait_id)

        return pca
Ejemplo n.º 34
0
def jsonable(trait):
    """Return a dict suitable for using as json

    Actual turning into json doesn't happen here though"""

    dataset = create_dataset(dataset_name = trait.dataset.name, dataset_type = trait.dataset.type, group_name = trait.dataset.group.name)
    
    if dataset.type == "ProbeSet":
        return dict(name=trait.name,
                    symbol=trait.symbol,
                    dataset=dataset.name,
                    description=trait.description_display,
                    mean=trait.mean,
                    location=trait.location_repr,
                    lrs_score=trait.LRS_score_repr,
                    lrs_location=trait.LRS_location_repr,
                    additive=trait.additive
                    )
    elif dataset.type == "Publish":
        if trait.pubmed_id:
            return dict(name=trait.name,
                        dataset=dataset.name,
                        description=trait.description_display,
                        authors=trait.authors,
                        pubmed_text=trait.pubmed_text,
                        pubmed_link=trait.pubmed_link,
                        lrs_score=trait.LRS_score_repr,
                        lrs_location=trait.LRS_location_repr,
                        additive=trait.additive
                        )
        else:
            return dict(name=trait.name,
                        dataset=dataset.name,
                        description=trait.description_display,
                        authors=trait.authors,
                        pubmed_text=trait.pubmed_text,
                        lrs_score=trait.LRS_score_repr,
                        lrs_location=trait.LRS_location_repr,
                        additive=trait.additive
                        )
    elif dataset.type == "Geno":
        return dict(name=trait.name,
                    dataset=dataset.name,
                    location=trait.location_repr
                    )
    else:
        return dict()
Ejemplo n.º 35
0
def getSequence(trait, dataset_name):
    dataset = create_dataset(dataset_name)
     
    if dataset.type == 'ProbeSet':
        results = g.db.execute('''
                       SELECT
                               ProbeSet.BlatSeq
                       FROM
                               ProbeSet, ProbeSetFreeze, ProbeSetXRef
                       WHERE
                               ProbeSet.Id=ProbeSetXRef.ProbeSetId and
                               ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
                               ProbeSet.Name = %s
                               ProbeSetFreeze.Name = %s
               ''', trait.name, dataset.name).fetchone()

        return results[0]        
Ejemplo n.º 36
0
def add_cofactors(cross, this_dataset, covariates, samples):
    ro.numpy2ri.activate()

    covariate_list = covariates.split(",")
    covar_name_string = "c("
    for i, covariate in enumerate(covariate_list):
        this_covar_data = []
        covar_as_string = "c("
        trait_name = covariate.split(":")[0]
        dataset_ob = create_dataset(covariate.split(":")[1])
        trait_ob = GeneralTrait(dataset=dataset_ob,
                                name=trait_name,
                                cellid=None)

        this_dataset.group.get_samplelist()
        trait_samples = this_dataset.group.samplelist
        trait_sample_data = trait_ob.data
        for index, sample in enumerate(trait_samples):
            if sample in samples:
                if sample in trait_sample_data:
                    sample_value = trait_sample_data[sample].value
                    this_covar_data.append(sample_value)
                else:
                    this_covar_data.append("NA")

        for j, item in enumerate(this_covar_data):
            if j < (len(this_covar_data) - 1):
                covar_as_string += str(item) + ","
            else:
                covar_as_string += str(item)

        covar_as_string += ")"

        col_name = "covar_" + str(i)
        cross = add_phenotype(cross, covar_as_string, col_name)

        if i < (len(covariate_list) - 1):
            covar_name_string += '"' + col_name + '", '
        else:
            covar_name_string += '"' + col_name + '"'

    covar_name_string += ")"

    covars_ob = pull_var("trait_covars", cross, covar_name_string)

    return cross, covars_ob
Ejemplo n.º 37
0
    def __init__(self, kw):
        """This class gets invoked after hitting submit on the main menu (in
views.py).

        """

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.uc_id = uuid.uuid4()
        logger.debug("uc_id:", self.uc_id) # contains a unique id

        logger.debug("kw is:", kw)         # dict containing search terms
        if kw['search_terms_or']:
            self.and_or = "or"
            self.search_terms = kw['search_terms_or']
        else:
            self.and_or = "and"
            self.search_terms = kw['search_terms_and']
        self.search_term_exists = True
        self.results = []
        if kw['type'] == "Phenotypes":     # split datatype on type field
            dataset_type = "Publish"
        elif kw['type'] == "Genotypes":
            dataset_type = "Geno"
        else:
            dataset_type = "ProbeSet"      # ProbeSet is default
        self.dataset = create_dataset(kw['dataset'], dataset_type)
        logger.debug("search_terms:", self.search_terms)
        self.search()
        if self.search_term_exists:
            self.gen_search_result()
    def __init__(self, kw):
        """This class gets invoked after hitting submit on the main menu (in
views.py).

        """

        ###########################################
        #   Names and IDs of group / F2 set
        ###########################################

        # All Phenotypes is a special case we'll deal with later
        #if kw['dataset'] == "All Phenotypes":
        #    self.cursor.execute("""
        #        select PublishFreeze.Name, InbredSet.Name, InbredSet.Id from PublishFreeze,
        #        InbredSet where PublishFreeze.Name not like 'BXD300%' and InbredSet.Id =
        #        PublishFreeze.InbredSetId""")
        #    results = self.cursor.fetchall()
        #    self.dataset = map(lambda x: DataSet(x[0], self.cursor), results)
        #    self.dataset_groups = map(lambda x: x[1], results)
        #    self.dataset_group_ids = map(lambda x: x[2], results)
        #else:

        self.uc_id = uuid.uuid4()
        logger.debug("uc_id:", self.uc_id)  # contains a unique id

        logger.debug("kw is:", kw)  # dict containing search terms
        if kw['search_terms_or']:
            self.and_or = "or"
            self.search_terms = kw['search_terms_or']
        else:
            self.and_or = "and"
            self.search_terms = kw['search_terms_and']
        self.search_term_exists = True
        self.results = []
        if kw['type'] == "Phenotypes":  # split datatype on type field
            dataset_type = "Publish"
        elif kw['type'] == "Genotypes":
            dataset_type = "Geno"
        else:
            dataset_type = "ProbeSet"  # ProbeSet is default
        self.dataset = create_dataset(kw['dataset'], dataset_type)
        logger.debug("search_terms:", self.search_terms)
        self.search()
        if self.search_term_exists:
            self.gen_search_result()
Ejemplo n.º 39
0
    def __init__(self, get_qtl_info=False, get_sample_info=True, **kw):
        # xor assertion
        assert bool(kw.get('dataset')) != bool(kw.get('dataset_name')), "Needs dataset ob. or name";
        if kw.get('dataset_name'):
            self.dataset = create_dataset(kw.get('dataset_name'))
            #print(" in GeneralTrait created dataset:", self.dataset)
        else:
            self.dataset = kw.get('dataset')
        self.name = kw.get('name')                 # Trait ID, ProbeSet ID, Published ID, etc.
        #print("THE NAME IS:", self.name)
        self.cellid = kw.get('cellid')
        self.identification = kw.get('identification', 'un-named trait')
        self.haveinfo = kw.get('haveinfo', False)
        self.sequence = kw.get('sequence')         # Blat sequence, available for ProbeSet
        self.data = kw.get('data', {})

        # Sets defaults
        self.locus = None
        self.lrs = None
        self.pvalue = None
        self.mean = None
        self.additive = None
        self.num_overlap = None
        self.strand_probe = None
        self.symbol = None

        self.LRS_score_repr = "N/A"
        self.LRS_score_value = 0
        self.LRS_location_repr = "N/A"
        self.LRS_location_value = 1000000

        if kw.get('fullname'):
            name2 = value.split("::")
            if len(name2) == 2:
                self.dataset, self.name = name2
                # self.cellid is set to None above
            elif len(name2) == 3:
                self.dataset, self.name, self.cellid = name2

        # Todo: These two lines are necessary most of the time, but perhaps not all of the time
        # So we could add a simple if statement to short-circuit this if necessary
        if self.dataset.type != "Temp":
            self = retrieve_trait_info(self, self.dataset, get_qtl_info=get_qtl_info)
            if get_sample_info != False:
                self = retrieve_sample_data(self, self.dataset)
Ejemplo n.º 40
0
def get_export_metadata(trait_id, dataset_name):
    dataset = data_set.create_dataset(dataset_name)
    this_trait = create_trait(dataset=dataset,
                              name=trait_id,
                              cellid=None,
                              get_qtl_info=False)

    metadata = []
    if dataset.type == "Publish":
        metadata.append(["Phenotype ID: " + trait_id])
        metadata.append([
            "Phenotype URL: " + "http://genenetwork.org/show_trait?trait_id=" +
            trait_id + "&dataset=" + dataset_name
        ])
        metadata.append(["Group: " + dataset.group.name])
        metadata.append([
            "Phenotype: " +
            this_trait.description_display.replace(",", "\",\"")
        ])
        metadata.append([
            "Authors: " + (this_trait.authors if this_trait.authors else "N/A")
        ])
        metadata.append(
            ["Title: " + (this_trait.title if this_trait.title else "N/A")])
        metadata.append([
            "Journal: " + (this_trait.journal if this_trait.journal else "N/A")
        ])
        metadata.append([
            "Dataset Link: http://gn1.genenetwork.org/webqtl/main.py?FormID=sharinginfo&InfoPageName="
            + dataset.name
        ])
    else:
        metadata.append(["Record ID: " + trait_id])
        metadata.append([
            "Trait URL: " + "http://genenetwork.org/show_trait?trait_id=" +
            trait_id + "&dataset=" + dataset_name
        ])
        if this_trait.symbol:
            metadata.append(["Symbol: " + this_trait.symbol])
        metadata.append(["Dataset: " + dataset.name])
        metadata.append(["Group: " + dataset.group.name])

    metadata.append([])

    return metadata
Ejemplo n.º 41
0
def getSequence(trait, dataset_name):
    dataset = create_dataset(dataset_name)

    if dataset.type == 'ProbeSet':
        results = g.db.execute(
            '''
                       SELECT
                               ProbeSet.BlatSeq
                       FROM
                               ProbeSet, ProbeSetFreeze, ProbeSetXRef
                       WHERE
                               ProbeSet.Id=ProbeSetXRef.ProbeSetId and
                               ProbeSetFreeze.Id = ProbeSetXRef.ProbSetFreezeId and
                               ProbeSet.Name = %s
                               ProbeSetFreeze.Name = %s
               ''', trait.name, dataset.name).fetchone()

        return results[0]
Ejemplo n.º 42
0
def check_access_permissions():
    logger.debug("@app.before_request check_access_permissions")
    available = True
    if 'dataset' in request.args:
        permissions = DEFAULT_PRIVILEGES
        if request.args['dataset'] != "Temp":
            dataset = create_dataset(request.args['dataset'])

            if dataset.type == "Temp":
                permissions = DEFAULT_PRIVILEGES
            elif 'trait_id' in request.args:
                permissions = check_resource_availability(
                    dataset, request.args['trait_id'])
            elif dataset.type != "Publish":
                permissions = check_resource_availability(dataset)

        if 'view' not in permissions['data']:
            return redirect(url_for("no_access_page"))
Ejemplo n.º 43
0
    def run_analysis(self, requestform):
        print("Starting ePheWAS analysis on dataset")
        genofilelocation = locate(
            "BXD.geno", "genotype")  # Get the location of the BXD genotypes
        tissuealignerloc = locate(
            "Tissue_color_aligner.csv",
            "auwerx")  # Get the location of the Tissue_color_aligner

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)

        # Print some debug
        print "self.trait_id:" + self.trait_id + "\n"
        print "self.datasetname:" + self.datasetname + "\n"
        print "self.dataset.type:" + self.dataset.type + "\n"

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
            snpinfo.append(marker["name"])
            snpinfo.append(marker["chr"])
            snpinfo.append(marker["Mb"])

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo,
                                 nrow=len(parser.markers),
                                 dimnames=r_list(rnames,
                                                 r_c("SNP", "Chr", "Pos")),
                                 ncol=3,
                                 byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        print("Initialization of ePheWAS done !")
Ejemplo n.º 44
0
    def __init__(self, start_vars):
        # get trait list from db (database name)
        # calculate correlation with Base vector and targets

        # Check parameters
        assert('corr_type' in start_vars)
        assert(is_str(start_vars['corr_type']))
        assert('dataset' in start_vars)
        # assert('group' in start_vars) permitted to be empty?
        assert('corr_sample_method' in start_vars)
        assert('corr_samples_group' in start_vars)
        assert('corr_dataset' in start_vars)
        assert('min_expr' in start_vars)
        assert('corr_return_results' in start_vars)
        if 'loc_chr' in start_vars:
            assert('min_loc_mb' in start_vars)
            assert('max_loc_mb' in start_vars)

        with Bench("Doing correlations"):
            if start_vars['dataset'] == "Temp":
                self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = start_vars['group'])
                self.trait_id = "Temp"
                self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            else:
                helper_functions.get_species_dataset_trait(self, start_vars)

            self.dataset.group.read_genotype_file()

            corr_samples_group = start_vars['corr_samples_group']

            self.sample_data = {}
            self.corr_type = start_vars['corr_type']
            self.corr_method = start_vars['corr_sample_method']
            self.min_expr = get_float(start_vars,'min_expr')
            self.p_range_lower = get_float(start_vars,'p_range_lower',-1.0)
            self.p_range_upper = get_float(start_vars,'p_range_upper',1.0)

            if ('loc_chr' in start_vars and
                'min_loc_mb' in start_vars and
                'max_loc_mb' in start_vars):

                self.location_chr = get_string(start_vars,'loc_chr')
                self.min_location_mb = get_int(start_vars,'min_loc_mb')
                self.max_location_mb = get_int(start_vars,'max_loc_mb')

            self.get_formatted_corr_type()
            self.return_number = int(start_vars['corr_return_results'])

            #The two if statements below append samples to the sample list based upon whether the user
            #rselected Primary Samples Only, Other Samples Only, or All Samples

            primary_samples = self.dataset.group.samplelist
            if self.dataset.group.parlist != None:
                primary_samples += self.dataset.group.parlist
            if self.dataset.group.f1list != None:
                primary_samples += self.dataset.group.f1list

            #If either BXD/whatever Only or All Samples, append all of that group's samplelist
            if corr_samples_group != 'samples_other':
                self.process_samples(start_vars, primary_samples)

            #If either Non-BXD/whatever or All Samples, get all samples from this_trait.data and
            #exclude the primary samples (because they would have been added in the previous
            #if statement if the user selected All Samples)
            if corr_samples_group != 'samples_primary':
                if corr_samples_group == 'samples_other':
                    primary_samples = [x for x in primary_samples if x not in (
                                    self.dataset.group.parlist + self.dataset.group.f1list)]
                self.process_samples(start_vars, self.this_trait.data.keys(), primary_samples)

            self.target_dataset = data_set.create_dataset(start_vars['corr_dataset'])
            self.target_dataset.get_trait_data(self.sample_data.keys())

            self.correlation_results = []

            self.correlation_data = {}

            if self.corr_type == "tissue":
                self.trait_symbol_dict = self.dataset.retrieve_genes("Symbol")

                tissue_corr_data = self.do_tissue_correlation_for_all_traits()
                if tissue_corr_data != None:
                    for trait in tissue_corr_data.keys()[:self.return_number]:
                        self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])
                else:
                    for trait, values in self.target_dataset.trait_data.iteritems():
                        self.get_sample_r_and_p_values(trait, values)

            elif self.corr_type == "lit":
                self.trait_geneid_dict = self.dataset.retrieve_genes("GeneId")
                lit_corr_data = self.do_lit_correlation_for_all_traits()

                for trait in lit_corr_data.keys()[:self.return_number]:
                    self.get_sample_r_and_p_values(trait, self.target_dataset.trait_data[trait])

            elif self.corr_type == "sample":
                for trait, values in self.target_dataset.trait_data.iteritems():
                    self.get_sample_r_and_p_values(trait, values)

            self.correlation_data = collections.OrderedDict(sorted(self.correlation_data.items(),
                                                                   key=lambda t: -abs(t[1][0])))


            if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                #ZS: Convert min/max chromosome to an int for the location range option
                range_chr_as_int = None
                for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                    if chr_info.name == self.location_chr:
                        range_chr_as_int = order_id

            for _trait_counter, trait in enumerate(self.correlation_data.keys()[:self.return_number]):
                trait_object = GeneralTrait(dataset=self.target_dataset, name=trait, get_qtl_info=True, get_sample_info=False)

                if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":
                    #ZS: Convert trait chromosome to an int for the location range option
                    chr_as_int = 0
                    for order_id, chr_info in self.dataset.species.chromosomes.chromosomes.iteritems():
                        if chr_info.name == trait_object.chr:
                            chr_as_int = order_id

                if (float(self.correlation_data[trait][0]) >= self.p_range_lower and
                    float(self.correlation_data[trait][0]) <= self.p_range_upper):

                    if self.target_dataset.type == "ProbeSet" or self.target_dataset.type == "Geno":

                        if (self.min_expr != None) and (float(trait_object.mean) < self.min_expr):
                            continue
                        elif range_chr_as_int != None and (chr_as_int != range_chr_as_int):
                            continue
                        elif (self.min_location_mb != None) and (float(trait_object.mb) < float(self.min_location_mb)):
                            continue
                        elif (self.max_location_mb != None) and (float(trait_object.mb) > float(self.max_location_mb)):
                            continue

                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue" and tissue_corr_data != None:
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)
                    else:
                        (trait_object.sample_r,
                        trait_object.sample_p,
                        trait_object.num_overlap) = self.correlation_data[trait]

                        # Set some sane defaults
                        trait_object.tissue_corr = 0
                        trait_object.tissue_pvalue = 0
                        trait_object.lit_corr = 0
                        if self.corr_type == "tissue":
                            trait_object.tissue_corr = tissue_corr_data[trait][1]
                            trait_object.tissue_pvalue = tissue_corr_data[trait][2]
                        elif self.corr_type == "lit":
                            trait_object.lit_corr = lit_corr_data[trait][1]
                        self.correlation_results.append(trait_object)

            self.target_dataset.get_trait_info(self.correlation_results, self.target_dataset.group.species)

            if self.corr_type != "lit" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_lit_correlation_for_trait_list()

            if self.corr_type != "tissue" and self.dataset.type == "ProbeSet" and self.target_dataset.type == "ProbeSet":
                self.do_tissue_correlation_for_trait_list()

        self.json_results = generate_corr_json(self.correlation_results, self.this_trait, self.dataset, self.target_dataset)
Ejemplo n.º 45
0
    def run_analysis(self, requestform):
        logger.info("Starting PheWAS analysis on dataset")
        genofilelocation = locate("BXD.geno", "genotype")                                  # Get the location of the BXD genotypes
        precompfile = locate_phewas("PheWAS_pval_EMMA_norm.RData", "auwerx")              # Get the location of the pre-computed EMMA results

        # Get user parameters, trait_id and dataset, and store/update them in self
        self.trait_id = requestform["trait_id"]
        self.datasetname = requestform["dataset"]
        self.dataset = data_set.create_dataset(self.datasetname)
        self.region = int(requestform["num_region"])
        self.mtadjust = str(requestform["sel_mtadjust"])

        # Logger.Info some debug
        logger.info("self.trait_id:" + self.trait_id + "\n")
        logger.info("self.datasetname:" + self.datasetname + "\n")
        logger.info("self.dataset.type:" + self.dataset.type + "\n")

        # GN Magic ?
        self.this_trait = GeneralTrait(dataset=self.dataset, name = self.trait_id, get_qtl_info = False, get_sample_info=False)
        logger.info(vars(self.this_trait))

        # Set the values we need
        self.chr = str(self.this_trait.chr);
        self.mb = int(self.this_trait.mb);

        # logger.info some debug
        logger.info("location:" + self.chr + ":" + str(self.mb) + "+/-" + str(self.region) + "\n")

        # Load in the genotypes file *sigh* to make the markermap
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        snpinfo = []
        for marker in parser.markers:
          snpinfo.append(marker["name"]);
          snpinfo.append(marker["chr"]);
          snpinfo.append(marker["Mb"]);

        rnames = r_seq(1, len(parser.markers))
        # Create the snp aligner object out of the BXD genotypes
        snpaligner = ro.r.matrix(snpinfo, nrow=len(parser.markers), dimnames = r_list(rnames, r_c("SNP", "Chr", "Pos")), ncol = 3, byrow=True)

        # Create the phenotype aligner object using R
        phenoaligner = self.r_create_Pheno_aligner()

        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("phewas_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']
        self.results['mtadjust'] = self.mtadjust
        logger.info("IMAGE AT:", self.results['imgurl1'] )
        logger.info("IMAGE AT:", self.results['imgloc1'] )
        # Create the PheWAS plot (The gene/probe name, chromosome and gene/probe positions should come from the user input)
        # TODO: generate the PDF in the temp folder, with a unique name
        assert(precompfile)
        assert(phenoaligner)
        assert(snpaligner)
        phewasres = self.r_PheWASManhattan("Test", precompfile, phenoaligner, snpaligner, "None", self.chr, self.mb, self.region, self.results['imgloc1'] , self.mtadjust)
        self.results['phewas1'] = phewasres[0]
        self.results['phewas2'] = phewasres[1]
        self.results['tabulardata'] = phewasres[2]
        self.results['R_debuglog'] = phewasres[3]

        #self.r_PheWASManhattan(allpvalues)
        #self.r_Stop()

        logger.info("Initialization of PheWAS done !")
Ejemplo n.º 46
0
    def __init__(self, params):
        self.data_set_1 = data_set.create_dataset(params['dataset_1'])
        self.data_set_2 = data_set.create_dataset(params['dataset_2'])
        self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
        self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)

        try:
            width = int(params['width'])
        except:
            width = 800
        self.width = width

        try:
            height = int(params['height'])
        except:
            height = 600
        self.height = height

        try:
            circle_color = params['circle_color']
        except:
            circle_color = '#3D85C6'
        self.circle_color = circle_color

        try:
            circle_radius = int(params['circle_radius'])
        except:
            circle_radius = 5
        self.circle_radius = circle_radius

        try:
            line_color = params['line_color']
        except:
            line_color = '#FF0000'
        self.line_color = line_color

        try:
            line_width = int(params['line_width'])
        except:
            line_width = 1
        self.line_width = line_width

        samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)

        self.data = []
        self.indIDs = samples_1.keys()
        vals_1 = []
        for sample in samples_1.keys():
            vals_1.append(samples_1[sample].value)
        self.data.append(vals_1)
        vals_2 = []
        for sample in samples_2.keys():
            vals_2.append(samples_2[sample].value)
        self.data.append(vals_2)

        x = np.array(vals_1)
        y = np.array(vals_2)
        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        
        rx = stats.rankdata(x)
        ry = stats.rankdata(y)        
        self.rdata = []
        self.rdata.append(rx.tolist())
        self.rdata.append(ry.tolist())        
        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)

        self.js_data = dict(
            data = self.data,
            rdata = self.rdata,
            indIDs = self.indIDs,
            trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name),
            trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name),
            samples_1 = samples_1,
            samples_2 = samples_2,
            num_overlap = num_overlap,
            vals_1 = vals_1,
            vals_2 = vals_2,
            
            slope = slope,
            intercept = intercept,
            r_value = r_value,
            p_value = p_value,
            
            srslope = srslope,
            srintercept = srintercept,
            srr_value = srr_value,
            srp_value = srp_value,
            
            width = width,
            height = height,
            circle_color = circle_color,
            circle_radius = circle_radius,
            line_color = line_color,
            line_width = line_width
        )
        self.jsdata = self.js_data
    def __init__(self, kw):
        self.type = kw['type']
        self.terms = kw['terms']
        #self.row_range = kw['row_range']
        if self.type == "gene":
            sql = """
                SELECT
                Species.`Name` AS species_name,
                InbredSet.`Name` AS inbredset_name,
                Tissue.`Name` AS tissue_name,
                ProbeSetFreeze.Name AS probesetfreeze_name,
                ProbeSet.Name AS probeset_name,
                ProbeSet.Symbol AS probeset_symbol,
                ProbeSet.`description` AS probeset_description,
                ProbeSet.Chr AS chr,
                ProbeSet.Mb AS mb,
                ProbeSetXRef.Mean AS mean,
                ProbeSetXRef.LRS AS lrs,
                ProbeSetXRef.`Locus` AS locus,
                ProbeSetXRef.`pValue` AS pvalue,
                ProbeSetXRef.`additive` AS additive
                FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
                WHERE InbredSet.`SpeciesId`=Species.`Id`
                AND ProbeFreeze.InbredSetId=InbredSet.`Id`
                AND ProbeFreeze.`TissueId`=Tissue.`Id`
                AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
                AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
                AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
                AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
                AND ProbeSetFreeze.public > 0
                ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
                LIMIT 6000
                """ % (self.terms)
            with Bench("Running query"):
                logger.sql(sql)
                re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[3], "ProbeSet", get_samplelist=False)
                    trait_id = line[4]
                    #with Bench("Building trait object"):
                    this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
                    self.trait_list.append(this_trait)

        elif self.type == "phenotype":
            sql = """
                SELECT
                Species.`Name`,
                InbredSet.`Name`,
                PublishFreeze.`Name`,
                PublishXRef.`Id`,
                Phenotype.`Post_publication_description`,
                Publication.`Authors`,
                Publication.`Year`,
                PublishXRef.`LRS`,
                PublishXRef.`Locus`,
                PublishXRef.`additive`
                FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
                WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
                AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
                AND InbredSet.`SpeciesId`=Species.`Id`
                AND PublishXRef.`PhenotypeId`=Phenotype.`Id`
                AND PublishXRef.`PublicationId`=Publication.`Id`
                AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]"
                    OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]"
                    OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]"
                    OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]")
                ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
                LIMIT 6000
                """ % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms)
            logger.sql(sql)
            re = g.db.execute(sql).fetchall()
            self.trait_list = []
            with Bench("Creating trait objects"):
                for line in re:
                    dataset = create_dataset(line[2], "Publish")
                    trait_id = line[3]
                    this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True, get_sample_info=False)
                    self.trait_list.append(this_trait)

        self.results = self.convert_to_json()
Ejemplo n.º 48
0
    def __init__(self, params):
        self.data_set_1 = data_set.create_dataset(params['dataset_1'])
        self.data_set_2 = data_set.create_dataset(params['dataset_2'])
        #self.data_set_3 = data_set.create_dataset(params['dataset_3'])
        self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
        self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)
        #self.trait_3 = GeneralTrait(name=params['trait_3'], dataset=self.data_set_3)

        samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)

        self.data = []
        self.indIDs = samples_1.keys()
        vals_1 = []
        for sample in samples_1.keys():
            vals_1.append(samples_1[sample].value)
        self.data.append(vals_1)
        vals_2 = []
        for sample in samples_2.keys():
            vals_2.append(samples_2[sample].value)
        self.data.append(vals_2)

        x = np.array(vals_1)
        y = np.array(vals_2)
        slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
        
        rx = stats.rankdata(x)
        ry = stats.rankdata(y)        
        self.rdata = []
        self.rdata.append(rx.tolist())
        self.rdata.append(ry.tolist())        
        srslope, srintercept, srr_value, srp_value, srstd_err = stats.linregress(rx, ry)

        #vals_3 = []
        #for sample in self.trait_3.data:
        #    vals_3.append(self.trait_3.data[sample].value)

        self.js_data = dict(
            data = self.data,
            rdata = self.rdata,
            indIDs = self.indIDs,
            trait_1 = self.trait_1.dataset.name + ": " + str(self.trait_1.name),
            trait_2 = self.trait_2.dataset.name + ": " + str(self.trait_2.name),
            samples_1 = samples_1,
            samples_2 = samples_2,
            num_overlap = num_overlap,
            vals_1 = vals_1,
            vals_2 = vals_2,

            slope = slope,
            intercept = intercept,
            r_value = r_value,
            p_value = p_value,

            srslope = srslope,
            srintercept = srintercept,
            srr_value = srr_value,
            srp_value = srp_value

            #trait3 = self.trait_3.data,
            #vals_3 = vals_3
        )
        self.jsdata = self.js_data
Ejemplo n.º 49
0
	def __init__(self, kw):
		self.type = kw['type']
		self.terms = kw['terms']
		if self.type == "gene":
			sql = """
				SELECT
				Species.`Name` AS species_name,
				InbredSet.`Name` AS inbredset_name,
				Tissue.`Name` AS tissue_name,
				ProbeSetFreeze.Name AS probesetfreeze_name,
				ProbeSet.Name AS probeset_name,
				ProbeSet.Symbol AS probeset_symbol,
				ProbeSet.`description` AS probeset_description,
				ProbeSet.Chr AS chr,
				ProbeSet.Mb AS mb,
				ProbeSetXRef.Mean AS mean,
				ProbeSetXRef.LRS AS lrs,
				ProbeSetXRef.`Locus` AS locus,
				ProbeSetXRef.`pValue` AS pvalue,
				ProbeSetXRef.`additive` AS additive
				FROM Species, InbredSet, ProbeSetXRef, ProbeSet, ProbeFreeze, ProbeSetFreeze, Tissue
				WHERE InbredSet.`SpeciesId`=Species.`Id`
				AND ProbeFreeze.InbredSetId=InbredSet.`Id`
				AND ProbeFreeze.`TissueId`=Tissue.`Id`
				AND ProbeSetFreeze.ProbeFreezeId=ProbeFreeze.Id
				AND ( MATCH (ProbeSet.Name,ProbeSet.description,ProbeSet.symbol,alias,GenbankId, UniGeneId, Probe_Target_Description) AGAINST ('%s' IN BOOLEAN MODE) )
				AND ProbeSet.Id = ProbeSetXRef.ProbeSetId
				AND ProbeSetXRef.ProbeSetFreezeId=ProbeSetFreeze.Id
				AND ProbeSetFreeze.public > 0
				ORDER BY species_name, inbredset_name, tissue_name, probesetfreeze_name, probeset_name
				LIMIT 5000
				""" % (self.terms)
			re = g.db.execute(sql).fetchall()
			self.trait_list = []
			for line in re:
				dataset = create_dataset(line[3], "ProbeSet")
				trait_id = line[4]
				this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True)
				self.trait_list.append(this_trait)
				species = webqtlDatabaseFunction.retrieve_species(dataset.group.name)
				dataset.get_trait_info([this_trait], species)

		elif self.type == "phenotype":
			sql = """
				SELECT
				Species.`Name`,
				InbredSet.`Name`,
				PublishFreeze.`Name`,
				PublishXRef.`Id`,
				Phenotype.`Post_publication_description`,
				Publication.`Authors`,
				Publication.`Year`,
				PublishXRef.`LRS`,
				PublishXRef.`Locus`,
				PublishXRef.`additive`
				FROM Species,InbredSet,PublishFreeze,PublishXRef,Phenotype,Publication
				WHERE PublishXRef.`InbredSetId`=InbredSet.`Id`
				AND PublishFreeze.`InbredSetId`=InbredSet.`Id`
				AND InbredSet.`SpeciesId`=Species.`Id`
				AND PublishXRef.`PhenotypeId`=Phenotype.`Id` 
				AND PublishXRef.`PublicationId`=Publication.`Id`
				AND	  (Phenotype.Post_publication_description REGEXP "[[:<:]]%s[[:>:]]" 
					OR Phenotype.Pre_publication_description REGEXP "[[:<:]]%s[[:>:]]" 
					OR Phenotype.Pre_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" 
					OR Phenotype.Post_publication_abbreviation REGEXP "[[:<:]]%s[[:>:]]" 
					OR Phenotype.Lab_code REGEXP "[[:<:]]%s[[:>:]]" 
					OR Publication.PubMed_ID REGEXP "[[:<:]]%s[[:>:]]" 
					OR Publication.Abstract REGEXP "[[:<:]]%s[[:>:]]" 
					OR Publication.Title REGEXP "[[:<:]]%s[[:>:]]" 
					OR Publication.Authors REGEXP "[[:<:]]%s[[:>:]]" 
					OR PublishXRef.Id REGEXP "[[:<:]]%s[[:>:]]")
				ORDER BY Species.`Name`, InbredSet.`Name`, PublishXRef.`Id`
				LIMIT 5000
				""" % (self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms, self.terms)
			re = g.db.execute(sql).fetchall()
			self.trait_list = []
			for line in re:
				dataset = create_dataset(line[2], "Publish")
				trait_id = line[3]
				this_trait = GeneralTrait(dataset=dataset, name=trait_id, get_qtl_info=True)
				self.trait_list.append(this_trait)
				species = webqtlDatabaseFunction.retrieve_species(dataset.group.name)
				dataset.get_trait_info([this_trait], species)
Ejemplo n.º 50
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        #ZS: Get verify/rna-seq link URLs
        try:
            blatsequence = self.this_trait.blatseq
            if not blatsequence:
                #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
                query1 = """SELECT Probe.Sequence, Probe.Name
                           FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                           WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                                 ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                                 ProbeSetFreeze.Name = '%s' AND
                                 ProbeSet.Name = '%s' AND
                                 Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)
                seqs = g.db.execute(query1).fetchall()
                if not seqs:
                    raise ValueError
                else:
                    blatsequence = ''
                    for seqt in seqs:
                        if int(seqt[1][-1]) % 2 == 1:
                            blatsequence += string.strip(seqt[0])

            #--------Hongqiang add this part in order to not only blat ProbeSet, but also blat Probe
            blatsequence = '%3E' + self.this_trait.name + '%0A' + blatsequence + '%0A'
            #XZ, 06/03/2009: ProbeSet name is not unique among platforms. We should use ProbeSet Id instead.
            query2 = """SELECT Probe.Sequence, Probe.Name
                        FROM Probe, ProbeSet, ProbeSetFreeze, ProbeSetXRef
                        WHERE ProbeSetXRef.ProbeSetFreezeId = ProbeSetFreeze.Id AND
                              ProbeSetXRef.ProbeSetId = ProbeSet.Id AND
                              ProbeSetFreeze.Name = '%s' AND
                              ProbeSet.Name = '%s' AND
                              Probe.ProbeSetId = ProbeSet.Id order by Probe.SerialOrder""" % (self.this_trait.dataset.name, self.this_trait.name)

            seqs = g.db.execute(query2).fetchall()
            for seqt in seqs:
                if int(seqt[1][-1]) %2 == 1:
                    blatsequence += '%3EProbe_' + string.strip(seqt[1]) + '%0A' + string.strip(seqt[0]) + '%0A'

            if self.dataset.group.species == "rat":
                self.UCSC_BLAT_URL = webqtlConfig.UCSC_BLAT % ('rat', 'rn3', blatsequence)
                self.UTHSC_BLAT_URL = ""
            elif self.dataset.group.species == "mouse":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('mouse', 'mm10', blatsequence)
                self.UTHSC_BLAT_URL = webqtlConfig.UTHSC_BLAT % ('mouse', 'mm10', blatsequence)
            elif self.dataset.group.species == "human":
                self.UCSC_BLAT_URL = webqtlConfig.UTHSC_BLAT2 % ('human', 'hg19', blatsequence)
                self.UTHSC_BLAT_URL = ""
            else:
                self.UCSC_BLAT_URL = ""
                self.UTHSC_BLAT_URL = ""
        except:
            self.UCSC_BLAT_URL = ""
            self.UTHSC_BLAT_URL = ""

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
           hddn['temp_trait'] = True
           hddn['group'] = self.temp_group
           hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
            self.sample_group_types['samples_other'] = "Other"
            self.sample_group_types['samples_all'] = "All"
        else:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.stats_table_width, self.trait_table_width = get_table_widths(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id = self.trait_id,
                       trait_symbol = trait_symbol,
                       dataset_type = self.dataset.type,
                       data_scale = self.dataset.data_scale,
                       sample_group_types = self.sample_group_types,
                       sample_lists = sample_lists,
                       attribute_names = self.sample_groups[0].attributes,
                       temp_uuid = self.temp_uuid)
        self.js_data = js_data
Ejemplo n.º 51
0
    def __init__(self, kw):
        logger.debug("in ShowTrait, kw are:", kw)

        if 'trait_id' in kw and kw['dataset'] != "Temp":
            self.temp_trait = False
            self.trait_id = kw['trait_id']
            helper_functions.get_species_dataset_trait(self, kw)
        elif 'group' in kw:
            self.temp_trait = True
            self.trait_id = "Temp_"+kw['species']+ "_" + kw['group'] + "_" + datetime.datetime.now().strftime("%m%d%H%M%S")
            self.temp_species = kw['species']
            self.temp_group = kw['group']
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = kw['trait_paste'].split()

            # Put values in Redis so they can be looked up later if added to a collection
            Redis.set(self.trait_id, kw['trait_paste'])
        else:
            self.temp_trait = True
            self.trait_id = kw['trait_id']
            self.temp_species = self.trait_id.split("_")[1]
            self.temp_group = self.trait_id.split("_")[2]
            self.dataset = data_set.create_dataset(dataset_name = "Temp", dataset_type = "Temp", group_name = self.temp_group)
            self.this_trait = GeneralTrait(dataset=self.dataset,
                                           name=self.trait_id,
                                           cellid=None)
            self.trait_vals = Redis.get(self.trait_id).split()

        #self.dataset.group.read_genotype_file()

        #if this_trait:
        #    if this_trait.dataset and this_trait.dataset.type and this_trait.dataset.type == 'ProbeSet':
        #            self.cursor.execute("SELECT h2 from ProbeSetXRef WHERE DataId = %d" %
        #                                this_trait.mysqlid)
        #            heritability = self.cursor.fetchone()

        self.build_correlation_tools()

        #Get nearest marker for composite mapping
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                self.nearest_marker = get_nearest_marker(self.this_trait, self.dataset)
                #self.nearest_marker1 = get_nearest_marker(self.this_trait, self.dataset)[0]
                #self.nearest_marker2 = get_nearest_marker(self.this_trait, self.dataset)[1]
            else:
                self.nearest_marker = ""
                #self.nearest_marker1 = ""
                #self.nearest_marker2 = ""

        self.make_sample_lists()

        # Todo: Add back in the ones we actually need from below, as we discover we need them
        hddn = OrderedDict()

        if self.dataset.group.allsamples:
            hddn['allsamples'] = string.join(self.dataset.group.allsamples, ' ')

        hddn['trait_id'] = self.trait_id
        hddn['dataset'] = self.dataset.name
        hddn['temp_trait'] = False
        if self.temp_trait:
           hddn['temp_trait'] = True
           hddn['group'] = self.temp_group
           hddn['species'] = self.temp_species
        hddn['use_outliers'] = False
        hddn['method'] = "pylmm"
        hddn['mapping_display_all'] = True
        hddn['suggestive'] = 0
        hddn['num_perm'] = 0
        hddn['manhattan_plot'] = ""
        hddn['control_marker'] = ""
        if not self.temp_trait:
            if hasattr(self.this_trait, 'locus_chr') and self.this_trait.locus_chr != "" and self.dataset.type != "Geno" and self.dataset.type != "Publish":
                hddn['control_marker'] = self.nearest_marker
                #hddn['control_marker'] = self.nearest_marker1+","+self.nearest_marker2
        hddn['do_control'] = False
        hddn['maf'] = 0.01
        hddn['compare_traits'] = []
        hddn['export_data'] = ""

        # We'll need access to this_trait and hddn in the Jinja2 Template, so we put it inside self
        self.hddn = hddn

        self.temp_uuid = uuid.uuid4()

        self.sample_group_types = OrderedDict()
        if len(self.sample_groups) > 1:
            self.sample_group_types['samples_primary'] = self.dataset.group.name + " Only"
            self.sample_group_types['samples_other'] = "Non-" + self.dataset.group.name
            self.sample_group_types['samples_all'] = "All Cases"
        else:
            self.sample_group_types['samples_primary'] = self.dataset.group.name
        sample_lists = [group.sample_list for group in self.sample_groups]

        self.get_mapping_methods()

        self.trait_table_width = get_trait_table_width(self.sample_groups)

        trait_symbol = None
        if not self.temp_trait:
            if self.this_trait.symbol:
                trait_symbol = self.this_trait.symbol

        js_data = dict(trait_id = self.trait_id,
                       trait_symbol = trait_symbol,
                       dataset_type = self.dataset.type,
                       data_scale = self.dataset.data_scale,
                       sample_group_types = self.sample_group_types,
                       sample_lists = sample_lists,
                       attribute_names = self.sample_groups[0].attributes,
                       temp_uuid = self.temp_uuid)
        self.js_data = js_data
Ejemplo n.º 52
0
    def run_analysis(self, requestform):
        print("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        print("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        print("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        print("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        print("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()
        print(dataset.group)
        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        print(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          print("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt = TRAIT.retrieve_sample_data(gt, dataset, individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        print(rPheno)

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno, check_names = False)
        rGeno = r_data_frame(rGeno, check_names = False)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600, type='cairo-png')
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2                                                 # We start from 2, since R starts from 1 :)
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600, type='cairo-png')
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()

        # Create the interactive graph for cytoscape visualization (Nodes and Edges)
        print(type(significant))
        if not type(significant) == ri.RNULLType:
          for x in range(len(significant[0])):
            print(significant[0][x], significant[1][x], significant[2][x])            # Debug to console
            tsS = significant[0][x].split(':')                                        # Source
            tsT = significant[2][x].split(':')                                        # Target
            gtS = TRAIT.GeneralTrait(name = tsS[0], dataset_name = tsS[1])            # Retrieve Source info from the DB
            gtT = TRAIT.GeneralTrait(name = tsT[0], dataset_name = tsT[1])            # Retrieve Target info from the DB
            self.addNode(gtS)
            self.addNode(gtT)
            self.addEdge(gtS, gtT, significant, x)

            significant[0][x] = gtS.symbol + " (" + gtS.name + ")"                    # Update the trait name for the displayed table
            significant[2][x] = gtT.symbol + " (" + gtT.name + ")"                    # Update the trait name for the displayed table

        self.elements = json.dumps(self.nodes_list + self.edges_list)
Ejemplo n.º 53
0
    import sys

    from base import webqtlConfig
    from base.data_set import create_dataset
    from base.templatePage import templatePage
    from utility import webqtlUtil
    from dbFunction import webqtlDatabaseFunction

    db_conn = MySQLdb.Connect(db=webqtlConfig.DB_NAME,
                              host=webqtlConfig.MYSQL_SERVER,
                              user=webqtlConfig.DB_USER,
                              passwd=webqtlConfig.DB_PASSWD)
    cursor = db_conn.cursor()

    dataset_name = "HC_M2_0606_P"
    dataset = create_dataset(db_conn, dataset_name)

    #cursor.execute("""
    #            SELECT ProbeSet.Name as TNAME, 0 as thistable,
    #            ProbeSetXRef.Mean as TMEAN, ProbeSetXRef.LRS as TLRS,
    #            ProbeSetXRef.PVALUE as TPVALUE, ProbeSet.Chr_num as TCHR_NUM,
    #            ProbeSet.Mb as TMB, ProbeSet.Symbol as TSYMBOL,
    #            ProbeSet.name_num as TNAME_NUM
    #            FROM ProbeSetXRef, ProbeSet, Geno
    #            WHERE ProbeSetXRef.LRS > 99.0 and
    #            ABS(ProbeSet.Mb-Geno.Mb) < 5 and
    #            ProbeSetXRef.Locus = Geno.name and
    #            Geno.SpeciesId = 1 and
    #            ProbeSet.Chr = Geno.Chr and
    #            ProbeSet.Id = ProbeSetXRef.ProbeSetId and
    #            ProbeSetXRef.ProbeSetFreezeId = 112""")
 def __init__(self, params):
     self.data_set_1 = data_set.create_dataset(params['dataset_1'])
     self.data_set_2 = data_set.create_dataset(params['dataset_2'])
     self.trait_1 = GeneralTrait(name=params['trait_1'], dataset=self.data_set_1)
     self.trait_2 = GeneralTrait(name=params['trait_2'], dataset=self.data_set_2)
     
     try:
         width = int(params['width'])
     except:
         width = 800
     self.width = width
     
     try:
         height = int(params['height'])
     except:
         height = 600
     self.height = height
     
     try:
         circle_color = params['circle_color']
     except:
         circle_color = 'steelblue'
     self.circle_color = circle_color
     
     try:
         circle_radius = int(params['circle_radius'])
     except:
         circle_radius = 5
     self.circle_radius = circle_radius
     
     try:
         line_color = params['line_color']
     except:
         line_color = 'red'
     self.line_color = line_color
     
     try:
         line_width = int(params['line_width'])
     except:
         line_width = 1
     self.line_width = line_width
     
     samples_1, samples_2, num_overlap = corr_result_helpers.normalize_values_with_samples(self.trait_1.data, self.trait_2.data)
     vals_1 = []
     for sample in samples_1.keys():
         vals_1.append(samples_1[sample].value)
     vals_2 = []
     for sample in samples_2.keys():
         vals_2.append(samples_2[sample].value)
     x = np.array(vals_1)
     y = np.array(vals_2)
     slope, intercept, r_value, p_value, std_err = stats.linregress(x, y)
     self.js_data = dict(
         trait_1 = self.trait_1.dataset.name + ": " + self.trait_1.name,
         trait_2 = self.trait_2.dataset.name + ": " + self.trait_2.name,
         samples_1 = samples_1,
         samples_2 = samples_2,
         num_overlap = num_overlap,
         vals_1 = vals_1,
         vals_2 = vals_2,
         slope = slope,
         intercept = intercept,
         r_value = r_value,
         p_value = p_value,
         width = width,
         height = height,
         circle_color = circle_color,
         circle_radius = circle_radius,
         line_color = line_color,
         line_width = line_width
     )
Ejemplo n.º 55
0
    def run_analysis(self, requestform):
        print("Starting CTL analysis on dataset")
        self.trait_db_list = [trait.strip() for trait in requestform['trait_list'].split(',')]
        self.trait_db_list = [x for x in self.trait_db_list if x]

        print("strategy:", requestform.get("strategy"))
        strategy = requestform.get("strategy")

        print("nperm:", requestform.get("nperm"))
        nperm = int(requestform.get("nperm"))

        print("parametric:", requestform.get("parametric"))
        parametric = bool(requestform.get("parametric"))

        print("significance:", requestform.get("significance"))
        significance = float(requestform.get("significance"))

        # Get the name of the .geno file belonging to the first phenotype
        datasetname = self.trait_db_list[0].split(":")[1]
        dataset = data_set.create_dataset(datasetname)

        genofilelocation = locate(dataset.group.name + ".geno", "genotype")
        parser = genofile_parser.ConvertGenoFile(genofilelocation)
        parser.process_csv()

        # Create a genotype matrix
        individuals = parser.individuals
        markers = []
        markernames = []
        for marker in parser.markers:
          markernames.append(marker["name"])
          markers.append(marker["genotypes"])

        genotypes = list(itertools.chain(*markers))
        print(len(genotypes) / len(individuals), "==", len(parser.markers))

        rGeno = r_t(ro.r.matrix(r_unlist(genotypes), nrow=len(markernames), ncol=len(individuals), dimnames = r_list(markernames, individuals), byrow=True))

        # Create a phenotype matrix
        traits = []
        for trait in self.trait_db_list:
          print("retrieving data for", trait)
          if trait != "":
            ts = trait.split(':')
            gt = TRAIT.GeneralTrait(name = ts[0], dataset_name = ts[1])
            gt.retrieve_sample_data(individuals)
            for ind in individuals:
              if ind in gt.data.keys():
                traits.append(gt.data[ind].value)
              else:
                traits.append("-999")

        rPheno = r_t(ro.r.matrix(r_as_numeric(r_unlist(traits)), nrow=len(self.trait_db_list), ncol=len(individuals), dimnames = r_list(self.trait_db_list, individuals), byrow=True))

        # Use a data frame to store the objects
        rPheno = r_data_frame(rPheno)
        rGeno = r_data_frame(rGeno)

        # Debug: Print the genotype and phenotype files to disk
        #r_write_table(rGeno, "~/outputGN/geno.csv")
        #r_write_table(rPheno, "~/outputGN/pheno.csv")

        # Perform the CTL scan
        res = self.r_CTLscan(rGeno, rPheno, strategy = strategy, nperm = nperm, parametric = parametric, ncores = 6)

        # Get significant interactions
        significant = self.r_CTLsignificant(res, significance = significance)

        # Create an image for output
        self.results = {}
        self.results['imgurl1'] = webqtlUtil.genRandStr("CTLline_") + ".png"
        self.results['imgloc1'] = GENERATED_IMAGE_DIR + self.results['imgurl1']

        self.results['ctlresult'] = significant
        self.results['requestform'] = requestform             # Store the user specified parameters for the output page

        # Create the lineplot
        r_png(self.results['imgloc1'], width=1000, height=600)
        self.r_lineplot(res, significance = significance)
        r_dev_off()

        n = 2
        for trait in self.trait_db_list:
          # Create the QTL like CTL plots
          self.results['imgurl' + str(n)] = webqtlUtil.genRandStr("CTL_") + ".png"
          self.results['imgloc' + str(n)] = GENERATED_IMAGE_DIR + self.results['imgurl' + str(n)]
          r_png(self.results['imgloc' + str(n)], width=1000, height=600)
          self.r_plotCTLobject(res, (n-1), significance = significance, main='Phenotype ' + trait)
          r_dev_off()
          n = n + 1

        # Flush any output from R
        sys.stdout.flush()