Esempio n. 1
0
 def setUp(self):
     self.httpClient = client.HttpClient("http://example.com")
     self.httpClient._run_search_request = mock.Mock()
     self.httpClient._run_get_request = mock.Mock()
     self.objectId = "SomeId"
     self.objectName = "objectName"
     self.datasetId = "datasetId"
     self.variantSetId = "variantSetId"
     self.variantAnnotationSetId = "variantAnnotationSetId"
     self.featureSetId = "featureSetId"
     self.parentId = "parentId"
     self.feature = "feature"
     self.referenceSetId = "referenceSetId"
     self.referenceId = "referenceId"
     self.readGroupIds = ["readGroupId"]
     self.referenceName = "referenceName"
     self.bioSampleId = "bioSampleId"
     self.bioSampleName = "bioSampleName"
     self.individualName = "individualName"
     self.individualId = "individualId"
     self.geneSymbol = "geneSymbol"
     self.start = 100
     self.end = 101
     self.referenceName = "referenceName"
     self.callSetIds = ["id1", "id2"]
     self.pageSize = 1000
     self.httpClient.set_page_size(self.pageSize)
     self.assemblyId = "assemblyId"
     self.accession = "accession"
     self.md5checksum = "md5checksum"
     self.rnaQuantificationSetId = "rnaQuantificationSetId"
     self.rnaQuantificationId = "rnaQuantificationId"
     self.expressionLevelId = "expressionLevelId"
     self.threshold = 0.0
Esempio n. 2
0
 def setUp(self):
     self.httpClient = client.HttpClient("http://example.com")
     self.httpClient._runSearchRequest = mock.Mock()
     self.httpClient._runGetRequest = mock.Mock()
     self.objectId = "SomeId"
     self.objectName = "objectName"
     self.datasetId = "datasetId"
     self.variantSetId = "variantSetId"
     self.variantAnnotationSetId = "variantAnnotationSetId"
     self.referenceSetId = "referenceSetId"
     self.referenceId = "referenceId"
     self.readGroupIds = ["readGroupId"]
     self.referenceName = "referenceName"
     self.bioSampleId = "bioSampleId"
     self.bioSampleName = "bioSampleName"
     self.individualName = "individualName"
     self.individualId = "individualId"
     self.start = 100
     self.end = 101
     self.referenceName = "referenceName"
     self.callSetIds = ["id1", "id2"]
     self.pageSize = 1000
     self.httpClient.setPageSize(self.pageSize)
     self.assemblyId = "assemblyId"
     self.accession = "accession"
     self.md5checksum = "md5checksum"
Esempio n. 3
0
def setup_ga4gh_client():
    """
    Return the client library to use for searching the data on the GA4GH
    server specified in the config file.

    This function reads the config file and then
    it initializes the ga4gh client library
    """
    global serverURL
    global debugLevel
    global organization

    config = ConfigParser.ConfigParser()
    try:
        config.read('beacon2ga4gh.cfg')
        serverURL = config.get('refServer', 'url')
        debugLevel = config.get('refServer', 'debugLevel')
        # organization = config.get('refServer', 'organization')
    except ConfigParser.NoSectionError:
        logging.error(
            "section not found in beacon2ga4gh.cfg file, using 1kgenomes.ga4gh.org."
        )
        serverURL = "http://1kgenomes.ga4gh.org"
    logging.basicConfig(level=debugLevel)
    logging.debug('using {}'.format(serverURL))
    cl = client.HttpClient(serverURL)
    return cl
Esempio n. 4
0
def makeHttpClient():
    url = "http://example.com"
    debugLevel = 0
    workarounds = set()
    key = "KEY"
    httpClient = client.HttpClient(url, debugLevel, workarounds, key)
    return httpClient
Esempio n. 5
0
def ga2vcf_run(args):
    searchVariantsRequest = RequestFactory(args).createSearchVariantsRequest()
    workarounds = getWorkarounds(args)
    httpClient = client.HttpClient(args.baseUrl, args.verbose, workarounds,
                                   args.key)
    # do conversion
    vcfConverter = converters.VcfConverter(httpClient, searchVariantsRequest,
                                           args.outputFile, args.binaryOutput)
    vcfConverter.convert()
Esempio n. 6
0
def runDemo():

    httpClient = client.HttpClient("http://localhost:8000")
    iterator = httpClient.searchVariants(
        variantSetId="MWtnLXAzLXN1YnNldDptdm5jYWxs",
        referenceName="1", start=45000, end=50000)
    for variant in iterator:
        print(
            variant.referenceName, variant.start, variant.end,
            variant.referenceBases, variant.alternateBases, sep="\t")
Esempio n. 7
0
def ga2sam_run(args):
    # instantiate params
    searchReadsRequest = RequestFactory(args).createSearchReadsRequest()
    workarounds = getWorkarounds(args)
    httpClient = client.HttpClient(args.baseUrl, args.verbose, workarounds,
                                   args.key)

    # do conversion
    samConverter = converters.SamConverter(httpClient, searchReadsRequest,
                                           args.outputFile, args.binaryOutput)
    samConverter.convert()
Esempio n. 8
0
def ga2vcf_run(args):
    # The factory expects a variantSetIds value rather than a single variant
    # set, so we add this in by hand.
    args.variantSetIds = args.variantSetId
    searchVariantsRequest = RequestFactory(args).createSearchVariantsRequest()
    workarounds = getWorkarounds(args)
    httpClient = client.HttpClient(
        args.baseUrl, args.verbose, workarounds, args.key)
    # do conversion
    vcfConverter = converters.VcfConverter(
        httpClient, searchVariantsRequest, args.outputFile, args.binaryOutput)
    vcfConverter.convert()
Esempio n. 9
0
def _ga4gh_queries():
    """Performs queries against the GA4GH server."""
    if DEBUG:
        httpClient = g4client.HttpClient(API_SERVER_GA4GH, logLevel=logging.DEBUG)
    else:
        httpClient = g4client.HttpClient(API_SERVER_GA4GH)
    datasets = list(httpClient.search_datasets())
    # this file is mapped to hg37
    # https://api.23andme.com/res/txt/snps.b4e00fe1db50.data
    # means use the brca-hg37 reference set
    variant_sets = list(httpClient.search_variant_sets(dataset_id=datasets[0].id))
    iterator = httpClient.search_variants(variant_set_id='brca-hg38',
        reference_name="13", start=32315650, end=32315660)
    results = list()
    for variant in iterator:
        r = (variant.reference_name, variant.start, variant.end,\
            variant.reference_bases, variant.alternate_bases)
        print(r)
        results.append(r)
    # Going to need to get variants on 17 as well
    return results
Esempio n. 10
0
def runDemo():
    httpClient = client.HttpClient("http://server:80/current")
    request = protocol.SearchVariantsRequest()
    request.variantSetIds = ["1kg-phase1"]
    request.referenceName = "2"
    request.start = 33100
    request.end = 34000
    for variant in httpClient.searchVariants(request):
        print(variant.referenceName,
              variant.start,
              variant.end,
              variant.referenceBases,
              variant.alternateBases,
              sep="\t")
Esempio n. 11
0
def main():
    # Let's instantiate the GA4GH client first
    c = client.HttpClient(GA4GH_BASE_URL)

    # Since we've done it before, getting variants can be done
    # in a one-liner. We're picking up the first variant set
    # for the first dataset returned.

    ga4gh_variants = [v for v in c.searchVariants(
        c.searchVariantSets(c.searchDatasets().next().id).next().id,
        start=0,
        end=1000000,
        referenceName="1")]

    print(str(len(ga4gh_variants)) + " GA4GH variants.")

    # Now we'll access the ExAC API in search of variants on
    # the BRCA1 gene. See `hello_exac.py`

    GENE_NAME = "OR4F5"

    response = requests.get(
        EXAC_BASE_URL + "awesome?query=" + GENE_NAME + "&service=variants_in_gene")

    OR4F5_variants = response.json()

    print(str(len(OR4F5_variants)) + " ExAC variants.")

    # Let's find out if we have any matches on position.

    matches = []

    for OR4F5_variant in OR4F5_variants:
        for ga4gh_variant in ga4gh_variants:
            # Note that GA4GH positions are 0-based so we add
            # 1 to line it up with ExAC.
            if (ga4gh_variant.start + 1) == OR4F5_variant['pos']:
                print(OR4F5_variant['pos'])
                print(ga4gh_variant.start)
                matches.append((ga4gh_variant, OR4F5_variant))

    print("Found " + str(len(matches)) + " matches!")

    for match in matches:
        print(match[0].names)
        print(match[1]['rsid'])
        print(match[0].referenceBases, match[1]['ref'])
        print(match[0].alternateBases, match[1]['alt'])
Esempio n. 12
0
 def __init__(self, args):
     self._key = args.key
     # TODO this is an experimental addition which is useful for
     # testing. We should think about this and document it if we
     # this it's a useful feature. There is an argument for pushing
     # the backend instantiation into the client, and letting the
     # client be a factory, instantiating the correct Client class
     # depending on the prefix.
     filePrefix = "file://"
     if args.baseUrl.startswith(filePrefix):
         dataDir = args.baseUrl[len(filePrefix):]
         theBackend = backend.FileSystemBackend(dataDir)
         self._client = client.LocalClient(theBackend)
     else:
         self._client = client.HttpClient(args.baseUrl,
                                          verbosityToLogLevel(args.verbose),
                                          self._key)
Esempio n. 13
0
File: cli.py Progetto: afirth/server
def ga2vcf_run(args):
    # instantiate params
    searchVariantsRequest = RequestFactory(
        args).createSearchVariantsRequest()
    searchVariantSetsRequest = RequestFactory(
        args).createSearchVariantSetsRequest()
    if args.outputFile is None:
        outputStream = sys.stdout
    else:
        outputStream = open(args.outputFile, 'w')
    workarounds = getWorkarounds(args)
    httpClient = client.HttpClient(
        args.baseUrl, args.verbose, workarounds, args.key)

    # do conversion
    vcfConverter = converters.VcfConverter(
        httpClient, outputStream,
        searchVariantSetsRequest, searchVariantsRequest)
    vcfConverter.convert()

    # cleanup
    if args.outputFile is not None:
        outputStream.close()
Esempio n. 14
0
# coding: utf-8

# In[1]:

import ga4gh.client as client

c = client.HttpClient("http://1kgenomes.ga4gh.org")

import collections

# In[2]:

dataset = c.search_datasets().next()

for functionalVariantSet in c.search_variant_sets(dataset.id):
    if functionalVariantSet.name == "functional-annotation":
        functionalAnnotation = functionalVariantSet

functionalAnnotationSet = c.search_variant_annotation_sets(
    variant_set_id=functionalAnnotation.id).next()

# In[3]:

geneList = []

geneAndTermDict = collections.OrderedDict()

geneAndTermDict['name'] = 'BRCA1'
geneAndTermDict['start'] = 43044295
geneAndTermDict['end'] = 43170245
geneAndTermDict['chrome'] = '17'
Esempio n. 15
0
import ga4gh.protocol as protocol
import ga4gh.client as client

import auth

application = app = Flask(__name__)
app.config['SECRET_KEY'] = open(
    'secret_key', 'rb').read()  # head -c 24 /dev/urandom > secret_key

__OP = json.loads(open('op_config.json').read())

VARIANTS_URL = sys.argv[1] if len(
    sys.argv) > 1 else "http://*****:*****@app.route('/')
@auth.require_session(redirect_handler='login')
def index():
    return render_template('index_rp.html',
                           variant_sets=variant_sets,
                           user=session['id'])


@app.route('/query', methods=['POST'])
Esempio n. 16
0
def main():
    # First, instantiate an HTTP client using the BASE_URL.

    c = client.HttpClient(BASE_URL)

    # Now we'll get a variant set.

    # We can get the first item of an iterator using `.next()`.

    dataset = c.searchDatasets().next()

    variant_set = c.searchVariantSets(dataset.id).next()

    # We now collect the variants in that variant set.

    variants = c.searchVariants(
        variant_set.id,         # The ID of the variantSet
        start=0,                # Start position
        end=2**32,              # End position
        referenceName="1")      # chrom

    # And copy them into `variant_list`

    variant_list = []

    for variant in variants:
        variant_list.append(variant)

    # Our analysis will make counts of the reference and
    # alternate base lengths, so let's grab those from
    # each variant and make lists of the lengths.

    ref_lengths = []
    alt_lengths = []

    for variant in variant_list:
        ref_lengths.append(len(variant.referenceBases))
        for base in variant.alternateBases:
            alt_lengths.append(len(base))

    print(str(len(variant_list)) + " variants.")

    # Now we can create histograms for each of these lists.
    # see more examples http://matplotlib.org/1.2.1/examples/pylab_examples/histogram_demo.html

    plt.figure(1)

    binning = [x for x in range(1, np.max(ref_lengths) + 1)]

    n, bins, patches = plt.hist(ref_lengths, bins=binning, facecolor='red', alpha=0.75, log=True)
    plt.title("Frequency of reference base lengths")
    plt.xlabel('Length of reference')
    plt.ylabel('n variants of length (log)')
    plt.axis([0, len(n), 0, np.max(n)])

    plt.figure(2)

    binning = [x for x in range(1, np.max(alt_lengths) + 1)]

    m, binsm, patchesm = plt.hist(alt_lengths, bins=binning, facecolor='blue', alpha=0.75, log=True)
    plt.title("Frequency of alternate base lengths")
    plt.xlabel('Length of alts')
    plt.ylabel('n variants of length (log)')
    plt.axis([0, len(m), 0, np.max(m)])

    plt.show()
Esempio n. 17
0
File: cli.py Progetto: afirth/server
 def __init__(self, args):
     super(AbstractGetRunner, self).__init__(args)
     self._id = args.id
     self._httpClient = client.HttpClient(
         args.baseUrl, args.verbose, self._workarounds, self._key)
Esempio n. 18
0
File: cli.py Progetto: afirth/server
 def __init__(self, args):
     self._workarounds = getWorkarounds(args)
     self._key = args.key
     self._verbosity = args.verbose
     self._httpClient = client.HttpClient(
         args.baseUrl, args.verbose, self._workarounds, self._key)
Esempio n. 19
0
 def __init__(self, args):
     self._workarounds = set(args.workarounds.split(','))
     self._key = args.key
     self._verbosity = args.verbose
     self._httpClient = client.HttpClient(args.baseUrl, args.verbose,
                                          self._workarounds, self._key)
Esempio n. 20
0
def gene_route(gene_name):

    # First, let's request variants in the gene from ExAC.

    # Note that we aren't handling cases when the gene isn't found. The ExAC
    # API uses redirects to locate the gene of interest. Better error handling
    # is left as an exercise.

    print("Looking for " + str(gene_name))

    print("If this hangs forever ctrl-c to quit :)")

    response = requests.get(EXAC_BASE_URL + "awesome?query=" + gene_name +
                            "&service=variants_in_gene")

    exac_variants = response.json()

    # Now we'll check to make sure we got something back.

    print("Found " + str(len(exac_variants)) + " variants in " + gene_name)

    # As in `combine_apis` we'll get all the variants from the GA4GH
    # variant set.

    # We can refine our search by getting the range of positions
    # from the ExAC variants.

    min_start = 2**32
    max_start = 0
    chrom = "1"

    for variant in exac_variants:
        if variant['pos'] > max_start:
            max_start = variant['pos']
        if variant['pos'] < min_start:
            min_start = variant['pos']
        chrom = variant['chrom']

    print("Range: " + str(min_start) + ":" + str(max_start) + " on chrom " +
          chrom)

    c = client.HttpClient(GA4GH_BASE_URL)

    ga4gh_variants = [
        v for v in c.searchVariants(c.searchVariantSets(
            c.searchDatasets().next().id).next().id,
                                    start=min_start,
                                    end=max_start,
                                    referenceName=chrom)
    ]

    # We'll find if there are any matches and return them.
    # Matches is a list of tuples, the first of each tuple
    # being the GA4GH variant, and the second being the ExAC
    # variant.

    matches = []

    for exac_variant in exac_variants:
        for ga4gh_variant in ga4gh_variants:
            # Note that GA4GH positions are 0-based so we add
            # 1 to line it up with ExAC.
            if (ga4gh_variant.start + 1) == exac_variant['pos']:
                matches.append((ga4gh_variant.toJsonDict(), exac_variant))

    print("Found " + str(len(matches)) + " matches.")

    # You can point a web browser at this address to see some results:
    # http://localhost:5000/gene/or4f5

    # Now that we have a web service synthesizing the results
    # from ExAC and GA4GH, you may use this web service in the same
    # way we used ExAC or GA4GH in the hello_ examples.

    # response = requests.get("http://localhost:5000/gene/or4f5")
    # response_data = response.json()
    # for result in response_data['matches']:
    #   print result

    return flask.jsonify({"gene_name": gene_name, "matches": matches})
Esempio n. 21
0
def main():
    # First, instantiate an HTTP client using the BASE_URL.

    c = client.HttpClient(BASE_URL)

    # If you are using an IDE with autocompletion (like PyCharm)
    # you should be able to access the named functions by
    # placing a `.` after the c in your editor.

    # We'll start by finding the datasets as we did in the
    # previous example.

    response = c.searchDatasets()

    # Notice that the client returns a generator so we have
    # to iterate through the response to get our datasets.

    print(response)
    datasets = []

    for dataset in response:
        datasets.append(dataset)
        print(dataset)

    # We can repeat the process of collecting all variant
    # sets as was done in `hello_ga4gh` without fussing
    # with json.

    variant_sets = []

    for dataset in datasets:
        # The client provides results as classed objects,
        # so we can access their attributes using dot-notation.

        datasetId = dataset.id
        response = c.searchVariantSets(datasetId)
        for variant_set in response:
            variant_sets.append(variant_set)

    # We'll now pick out a single variant set to do some
    # analysis on.

    variant_set = variant_sets[0]
    variantSetId = variant_set.id
    variants = c.searchVariants(variantSetId, 100000, 900000, "1")

    # The client manages paging for us, so there may be
    # a large number of results generated by a search.
    variant_list = []

    for variant in variants:
        variant_list.append(variant)

    print(str(len(variant_list)) + " variants.")

    # Here we will generate the same count of reference base
    # length as in the previous examples.

    reference_base_counts = {}

    for variant in variant_list:
        reference_base_length = len(variant.referenceBases)
        if reference_base_length not in reference_base_counts:
            reference_base_counts[reference_base_length] = 1
        else:
            reference_base_counts[reference_base_length] += 1

    # Did we get the same results as in `hello_ga4gh.py`?

    print(reference_base_counts)
Esempio n. 22
0
 def __init__(self, args):
     super(AbstractGetRunner, self).__init__(args)
     self._id = args.id
     self._httpClient = client.HttpClient(
         args.baseUrl, verbosityToLogLevel(args.verbose), self._key)
Esempio n. 23
0
 def __init__(self, args):
     self._key = args.key
     self._httpClient = client.HttpClient(
         args.baseUrl, verbosityToLogLevel(args.verbose), self._key)