Beispiel #1
0
def gene_network2ws(meth, feature_set_id=None, out_id=None):
    """ Query all available network data in KBase central store.

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param out_id: Output Networks object identifier
    :type out_id: kbtypes.KBaseNetworks.Network
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3
    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    meth.advance("Retrieve genes from workspace")
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())

    gl_str = ",".join(list(set(qid2cds.values())));

    meth.advance("Running GeneList to Networks")
    argsx = {"ws_id" : meth.workspace_id, "inobj_id" : gl_str,  "outobj_id": out_id}
    try:
        gl_oid = gc.genelist_to_networks(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))

    meth.advance("Returning object")

    return _workspace_output(out_id)
Beispiel #2
0
def gwas_run_gwas2(meth,  genotype=None,  kinship_matrix=None, traits=None,  out=None):
    """Computes association between each SNP and a trait of interest that has been scored
    across a large number of individuals. This method takes Filtered SNP object,
    kinship matrix, trait object as input and computes association.

   :param genotype: Population variation object
   :type genotype: kbtypes.KBaseGwasData.GwasPopulationVariation
   :param kinship_matrix: Kinship matrix object id
   :type kinship_matrix: kbtypes.KBaseGwasData.GwasPopulationKinship
   :param traits: Trait object id
   :type traits: kbtypes.KBaseGwasData.GwasPopulationTrait
   :param out: Output
   :type out: kbtypes.KBaseGwasData.GwasTopVariations
   :return: New workspace object
   :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {"ws_id" : meth.workspace_id, "variation_id" : genotype, "trait_id" : traits,  "kinship_id": kinship_matrix, "out_id" : out, "comment" : "comment"}
    meth.advance("submit job to run GWAS analysis")
    try:
        jid = gc.run_gwas(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth, started="GWAS analysis using emma", running="GWAS analysis using emma").run(jid[0])
    return _workspace_output(out)
Beispiel #3
0
def gwas_run_kinship(meth,  filtered_variation=None, out=None, comment=None):
    """Computes the n by n kinship matrix for a set of n related subjects.
       The kinship matrix defines pairwise genetic relatedness among individuals and
       is estimated by using all genotyped markers. This requires the filtered SNPs as input.

    :param filtered_variation: Population variation, filtered
    :type filtered_variation: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param out: Computed Kinship matrix
    :type out: kbtypes.KBaseGwasData.GwasPopulationKinship
    :param comment: Comment
    :type comment: kbtypes.Unicode
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {"ws_id" : meth.workspace_id, "inobj_id" : filtered_variation, "outobj_id" : out,  "comment" : "comment"}
    meth.advance("submit job to select_random_snps")
    try:
        jid = gc.calculate_kinship_matrix(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth, started="Calculate Kinship matrix", running="Kinship matrix").run(jid[0])
    return _workspace_output(out)
Beispiel #4
0
def maf(meth, maf=0.05, variation=None, out=None, comment=None):
    """Perform filtering on Minor allele frequency (MAF).
    Minor allele frequency (MAF) refers to the frequency at which the least common
    <a href="http://en.wikipedia.org/wiki/Allele">allele</a> occurs in a given population.

    :param maf: Minor allele frequency
    :type maf: kbtypes.Numeric
    :param variation: Population variation object
    :type variation: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param out: Population variation, filtered
    :type out: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param comment: Comment
    :type comment: kbtypes.Unicode
    :return: Workspace ID of filtered data
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {"ws_id" : meth.workspace_id, "inobj_id" : variation, "outobj_id" : out, "minor_allele_frequency" : maf, "comment" : "comment"}
    meth.advance("submit job to filter VCF")
    try:
        jid = gc.prepare_variation(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth, started="run VCF", running="VCF").run(jid[0])
    return _workspace_output(out)
Beispiel #5
0
def gwas_variation_to_genes(meth, workspaceID=None, gwasObjectID=None, num2snps=None, pmin=None, distance=None, gl_out=None, fs_out=None):
    """This method takes the top SNPs obtained after GWAS analysis as input
    (TopVariations) object, -log (pvalue) cutoff and a distance parameter as input.
    For each significant SNP that passes the p-value cutoff, genes are searched in the
    window specified by the distance parameter.

    :param workspaceID: Workspace (use current if empty)
    :type workspaceID: kbtypes.Unicode
    :param gwasObjectID: GWAS analysis MLM result object
    :type gwasObjectID: kbtypes.KBaseGwasData.GwasTopVariations
    :param num2snps: Number to snps
    :type num2snps: kbtypes.Numeric
    :default num2snps: 100
    :param pmin: Minimum pvalue (-log10)
    :type pmin: kbtypes.Numeric
    :default pmin: 4
    :param distance: Distance in bp around SNP to look for genes
    :type distance: kbtypes.Numeric
    :default distance: 10000
    :param gl_out: Output GwasGeneLint workspace object name
    :type gl_out: kbtypes.KBaseGwasData.GwasGeneList
    :param fs_out: Output FeatureSet workspace object name
    :type fs_out: kbtypes.KBaseSearch.FeatureSet
    :return: Workspace objectID of gwas results
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    if not workspaceID:
        workspaceID = meth.workspace_id

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)
    meth.advance("Running Variations to Genes")
    argsx = {"ws_id" : meth.workspace_id, "variation_id" : gwasObjectID,  "out_id": gl_out, "num2snps" : num2snps, "pmin": pmin, "distance" : distance, "comment" : "comment"}
    try:
        gl_oid = gc.variations_to_genes(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))

    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    raw_data = ws.get(gl_out)

    gl = [ gr[2] for gr in raw_data['genes']]
    fs = genelist2fs(gl)
    ws.save_objects({'workspace' : meth.workspace_id, 'objects' :[{'type' : 'KBaseSearch.FeatureSet', 'data' : fs, 'name' : fs_out, 'meta' : {'original' : gl_out}}]})

    meth.advance("Returning object")
    return json.dumps({'values': [
                                   ["Workspace GwasGeneList object", gl_out],
                                   ["Workspace FeatureSet object", fs_out]
                                 ]})
Beispiel #6
0
def gwas_run_gwas2(meth,
                   genotype=None,
                   kinship_matrix=None,
                   traits=None,
                   out=None):
    """Computes association between each SNP and a trait of interest that has been scored
    across a large number of individuals. This method takes Filtered SNP object,
    kinship matrix, trait object as input and computes association.

   :param genotype: Population variation object
   :type genotype: kbtypes.KBaseGwasData.GwasPopulationVariation
   :param kinship_matrix: Kinship matrix object id
   :type kinship_matrix: kbtypes.KBaseGwasData.GwasPopulationKinship
   :param traits: Trait object id
   :type traits: kbtypes.KBaseGwasData.GwasPopulationTrait
   :param out: Output
   :type out: kbtypes.KBaseGwasData.GwasTopVariations
   :return: New workspace object
   :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {
        "ws_id": meth.workspace_id,
        "variation_id": genotype,
        "trait_id": traits,
        "kinship_id": kinship_matrix,
        "out_id": out,
        "comment": "comment"
    }
    meth.advance("submit job to run GWAS analysis")
    try:
        jid = gc.run_gwas(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth,
           started="GWAS analysis using emma",
           running="GWAS analysis using emma").run(jid[0])
    return _workspace_output(out)
Beispiel #7
0
def gene_network2ws(meth, obj_id=None, out_id=None):
    """This method displays a gene list
    along with functional annotation in a table.

    :param obj_id: Gene List workspace object identifier.
    :type obj_id: kbtypes.KBaseGwasData.GwasGeneList
    :param out_id: Output Networks object identifier
    :type out_id: kbtypes.KBaseNetworks.Network
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    # :param workspace_id: Workspace name (if empty, defaults to current workspace)
    # :type workspace_id: kbtypes.Unicode
    meth.stages = 3
    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    meth.advance("Retrieve genes from workspace")
    # if not workspace_id:
    #     meth.debug("Workspace ID is empty, setting to current ({})".format(meth.workspace_id))
    #     workspace_id = meth.workspace_id
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)

    raw_data = ws.get(obj_id)

    gl = [gr[2] for gr in raw_data['genes']]
    gl_str = ",".join(gl)

    meth.advance("Running GeneList to Networks")
    argsx = {
        "ws_id": meth.workspace_id,
        "inobj_id": gl_str,
        "outobj_id": out_id
    }
    try:
        gl_oid = gc.genelist_to_networks(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    #if not gl_oid: # it may return empty string based on current script
    #    raise GWASException(2, "submit job failed, no job id")

    meth.advance("Returning object")
    return _workspace_output(out_id)
Beispiel #8
0
def gene_network2ws(meth, obj_id=None, out_id=None):
    """This method displays a gene list
    along with functional annotation in a table.

    :param obj_id: Gene List workspace object identifier.
    :type obj_id: kbtypes.KBaseGwasData.GwasGeneList
    :param out_id: Output Networks object identifier
    :type out_id: kbtypes.KBaseNetworks.Network
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    # :param workspace_id: Workspace name (if empty, defaults to current workspace)
    # :type workspace_id: kbtypes.Unicode
    meth.stages = 3
    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    meth.advance("Retrieve genes from workspace")
    # if not workspace_id:
    #     meth.debug("Workspace ID is empty, setting to current ({})".format(meth.workspace_id))
    #     workspace_id = meth.workspace_id
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)

    raw_data = ws.get(obj_id)


    gl = [ gr[2] for gr in raw_data['genes']]
    gl_str = ",".join(gl);

    meth.advance("Running GeneList to Networks")
    argsx = {"ws_id" : meth.workspace_id, "inobj_id" : gl_str,  "outobj_id": out_id}
    try:
        gl_oid = gc.genelist_to_networks(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    #if not gl_oid: # it may return empty string based on current script
    #    raise GWASException(2, "submit job failed, no job id")

    meth.advance("Returning object")
    return _workspace_output(out_id)
Beispiel #9
0
def maf(meth, maf=0.05, variation=None, out=None, comment=None):
    """Perform filtering on Minor allele frequency (MAF).
    Minor allele frequency (MAF) refers to the frequency at which the least common
    <a href="http://en.wikipedia.org/wiki/Allele">allele</a> occurs in a given population.

    :param maf: Minor allele frequency
    :type maf: kbtypes.Numeric
    :param variation: Population variation object
    :type variation: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param out: Population variation, filtered
    :type out: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param comment: Comment
    :type comment: kbtypes.Unicode
    :return: Workspace ID of filtered data
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {
        "ws_id": meth.workspace_id,
        "inobj_id": variation,
        "outobj_id": out,
        "minor_allele_frequency": maf,
        "comment": "comment"
    }
    meth.advance("submit job to filter VCF")
    try:
        jid = gc.prepare_variation(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth, started="run VCF", running="VCF").run(jid[0])
    return _workspace_output(out)
Beispiel #10
0
def gwas_run_kinship(meth, filtered_variation=None, out=None, comment=None):
    """Computes the n by n kinship matrix for a set of n related subjects.
       The kinship matrix defines pairwise genetic relatedness among individuals and
       is estimated by using all genotyped markers. This requires the filtered SNPs as input.

    :param filtered_variation: Population variation, filtered
    :type filtered_variation: kbtypes.KBaseGwasData.GwasPopulationVariation
    :param out: Computed Kinship matrix
    :type out: kbtypes.KBaseGwasData.GwasPopulationKinship
    :param comment: Comment
    :type comment: kbtypes.Unicode
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    argsx = {
        "ws_id": meth.workspace_id,
        "inobj_id": filtered_variation,
        "outobj_id": out,
        "comment": "comment"
    }
    meth.advance("submit job to select_random_snps")
    try:
        jid = gc.calculate_kinship_matrix(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))
    if not jid:
        raise GWASException(2, "submit job failed, no job id")

    AweJob.URL = URLS.awe
    AweJob(meth, started="Calculate Kinship matrix",
           running="Kinship matrix").run(jid[0])
    return _workspace_output(out)
Beispiel #11
0
def gene_network2ws(meth, feature_set_id=None, out_id=None):
    """ Query all available network data in KBase central store.

    :param feature_set_id: FeatureSet workspace object id
    :type feature_set_id: kbtypes.KBaseSearch.FeatureSet
    :param out_id: Output Networks object identifier
    :type out_id: kbtypes.KBaseNetworks.Network
    :return: New workspace object
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3
    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)

    meth.advance("Retrieve genes from workspace")
    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    fs = ws.get(feature_set_id)
    qid2cds = ids2cds(fs['elements'].keys())

    gl_str = ",".join(list(set(qid2cds.values())))

    meth.advance("Running GeneList to Networks")
    argsx = {
        "ws_id": meth.workspace_id,
        "inobj_id": gl_str,
        "outobj_id": out_id
    }
    try:
        gl_oid = gc.genelist_to_networks(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))

    meth.advance("Returning object")

    return _workspace_output(out_id)
Beispiel #12
0
def go_enrch_net(meth, net_obj_id=None, p_value=0.05, ec=None, domain=None):
    """Identify Gene Ontology terms enriched in individual network clusters

    :param net_obj_id: Cluster object id
    :type net_obj_id: kbtypes.KBaseNetworks.Network
    :param p_value: p-value cutoff
    :type p_value: kbtypes.Unicode
    :param ec: Evidence code list (comma separated, IEA, ...)
    :type ec:kbtypes.Unicode
    :param domain: Domain list (comma separated, biological_process, ...)
    :type domain: kbtypes.Unicode
    :return: Workspace id
    :rtype: kbtypes.Unicode
    :output_widget: GeneTableWidget
    """
    #    :default p_value: 0.05
    meth.stages = 3

    meth.advance("Prepare enrichment test")
    gc = GWAS(URLS.gwas, token=meth.token)

    ec = ec.replace(" ", "")
    domain = domain.replace(" ", "")
    ec_list = [i for i in ec.split(',')]
    domain_list = [i for i in domain.split(',')]

    wsd = Workspace2(token=meth.token, wsid=meth.workspace_id)
    oc = Ontology(url=URLS.ontology)

    net_object = wsd.get_objects([{
        'workspace': meth.workspace_id,
        'name': net_obj_id
    }])
    nc = Node(net_object[0]['data']['nodes'], net_object[0]['data']['edges'])

    gids = [
        i for i in sorted(nc.ugids.keys()) if 'CDS' in i or 'locus' in i or (
            not 'clst' in i and not i.startswith('cluster'))
    ]

    gids2cds = ids2cds(gids)

    meth.advance("Run enrichment test for each clusters")
    rows = []
    for hr_nd in net_object[0]['data']['nodes']:
        gid = hr_nd['entity_id']
        if not (gid.startswith('cluster.') or 'clst' in gid): continue
        glist = nc.get_gene_list(gid)
        # now everything is in CDS id
        #llist = []
        #for i in glist:
        #    if i in mrnas: i = mrnas[i]
        #    if i in locus: i = locus[i]
        #    if 'locus' in i: llist.append(i)
        #llist = [ locus[mrnas[i]] ]; # it will ignore orignal locus ids (TODO: keep locus)
        cds_gl = [gids2cds[i] for i in glist]

        enr_list = oc.get_go_enrichment(cds_gl, domain_list, ec_list,
                                        'hypergeometric', 'GO')

        enr_list = sorted(enr_list, key=itemgetter('pvalue'), reverse=False)
        go_enr_smry = ""
        go_enr_anns = ["", "", ""]
        for i in range(len(enr_list)):
            goen = enr_list[i]
            if goen['pvalue'] > float(p_value): continue
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".desc"] = goen['goDesc'][0]
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".domain"] = goen['goDesc'][1]
            hr_nd['user_annotations']['gce.' + goen['goID'] +
                                      ".p_value"] = ` goen['pvalue'] `
            if i < 3:
                go_enr_smry += goen['goID'] + "(" + "{:6.4f}".format(
                    goen['pvalue']) + ")" + goen['goDesc'][0] + "\n"
                go_enr_anns[i] = goen['goID'] + "(" + "{:6.4f}".format(
                    goen['pvalue']) + ")" + goen['goDesc'][0]
        hr_nd['user_annotations']['go_enrichnment_annotation'] = go_enr_smry
        rows.append(
            [gid,
             len(glist), go_enr_anns[0], go_enr_anns[1], go_enr_anns[2]])

    wsd.save_objects({
        'workspace':
        meth.workspace_id,
        'objects': [{
            'type': 'KBaseNetworks.Network',
            'data': net_object[0]['data'],
            'name': net_obj_id + ".cenr",
            'meta': {
                'orginal': net_obj_id
            }
        }]
    })

    rows = sorted(rows, key=lambda x: x[1], reverse=True)

    #meth.debug("rows: {}".format(rows))
    header = [
        "Cluster ID", "# of Genes", "Annotation1", "Annotation2", "Annotation3"
    ]
    data = {'table': [header] + rows}
    return json.dumps(data)
Beispiel #13
0
def gwas_variation_to_genes(meth,
                            workspaceID=None,
                            gwasObjectID=None,
                            num2snps=None,
                            pmin=None,
                            distance=None,
                            gl_out=None,
                            fs_out=None):
    """This method takes the top SNPs obtained after GWAS analysis as input
    (TopVariations) object, -log (pvalue) cutoff and a distance parameter as input.
    For each significant SNP that passes the p-value cutoff, genes are searched in the
    window specified by the distance parameter.

    :param workspaceID: Workspace (use current if empty)
    :type workspaceID: kbtypes.Unicode
    :param gwasObjectID: GWAS analysis MLM result object
    :type gwasObjectID: kbtypes.KBaseGwasData.GwasTopVariations
    :param num2snps: Number to snps
    :type num2snps: kbtypes.Numeric
    :default num2snps: 100
    :param pmin: Minimum pvalue (-log10)
    :type pmin: kbtypes.Numeric
    :default pmin: 4
    :param distance: Distance in bp around SNP to look for genes
    :type distance: kbtypes.Numeric
    :default distance: 10000
    :param gl_out: Output GwasGeneLint workspace object name
    :type gl_out: kbtypes.KBaseGwasData.GwasGeneList
    :param fs_out: Output FeatureSet workspace object name
    :type fs_out: kbtypes.KBaseSearch.FeatureSet
    :return: Workspace objectID of gwas results
    :rtype: kbtypes.Unicode
    :output_widget: ValueListWidget
    """
    meth.stages = 3

    if not workspaceID:
        workspaceID = meth.workspace_id

    meth.advance("init GWAS service")
    gc = GWAS(URLS.gwas, token=meth.token)
    meth.advance("Running Variations to Genes")
    argsx = {
        "ws_id": meth.workspace_id,
        "variation_id": gwasObjectID,
        "out_id": gl_out,
        "num2snps": num2snps,
        "pmin": pmin,
        "distance": distance,
        "comment": "comment"
    }
    try:
        gl_oid = gc.variations_to_genes(argsx)
    except Exception as err:
        raise GWASException("submit job failed: {}".format(err))

    ws = Workspace2(token=meth.token, wsid=meth.workspace_id)
    raw_data = ws.get(gl_out)

    gl = [gr[2] for gr in raw_data['genes']]
    fs = genelist2fs(gl)
    ws.save_objects({
        'workspace':
        meth.workspace_id,
        'objects': [{
            'type': 'KBaseSearch.FeatureSet',
            'data': fs,
            'name': fs_out,
            'meta': {
                'original': gl_out
            }
        }]
    })

    meth.advance("Returning object")
    return json.dumps({
        'values': [["Workspace GwasGeneList object", gl_out],
                   ["Workspace FeatureSet object", fs_out]]
    })