def get_patient_samples(id):
    """ Get all gene samples for an individual """
    list = VcfSampleCollection().get_by_individualid(id)

    # add query string to fetch (status and filter by ids)

    return jsonify(list)
def beacon_query(chrom, position, allele, reference):
    """ Canonical Query1 """
    
    # Validate parameters
    result = VcfSampleCollection().get_variants_count(chrom, position, allele, reference)

    return jsonify({"count":result})
def get_samples_list():
    """ Retrieve a list of patient samples """

    # TODO: Validate parameters, error handling, and logging
    # TODO: Need to make this bounded (paging)
    list = VcfSampleCollection().get_all()

    return jsonify(list)
Beispiel #4
0
def import_vcf(file_id):

    VcfFileCollection().update_by_id(file_id, {'status': 'processing'})

    sample_count = 0

    try:
        stream = open(os.path.join(Settings.file_store, file_id + '.vcf'), 'r')
        vcf_reader = vcf.Reader(stream)

        samples = next(vcf_reader).samples
        sample_count = len(samples)
        
        stream.seek(0)
        vcf_reader = vcf.Reader(stream)

        for i in range(0, sample_count):
            stream.seek(0)
            vcf_reader = vcf.Reader(stream)
            variants = list()
            for record in vcf_reader:
                sample = record.samples[i]

                #TODO - there are better ways to handle this
                    # Do we need to store the reference for this query
                allleles = []
                if sample.gt_bases is not None:
                    alleles = re.split(r'[\\/|]', sample.gt_bases)
                    # remove duplicates
                    alleles = set(alleles)

                for allele in alleles:
                    chrom = record.CHROM
                    # remove preceeding chr if exists
                    if (re.match('chr', chrom, re.I)):
                        chrom = chrom[3:].upper()
                    if chrom in ['1', '2', '3', '4', '5', '6', '7', '8', '9','10','11','12','13','14','15','16','17','18','19','20','21','22', 'X', 'Y', 'M' ]:
                        variants.append(chrom + '_' + str(record.POS) + '_' + allele)

            VcfSampleCollection().add(
                {
                    'fileid': file_id,
                    'variants': variants
                })
    except:
        log.exception('error importing patient vcf')

    VcfFileCollection().update_by_id(file_id,
        {
            'status': 'complete',
            'samples': sample_count
        })
    log.info('import complete')
def upload_patient_samples(id):
    """
    VCF file upload operation
    """

    try:
        # check if the post request has the file part
        if 'file' not in request.files:
            return jsonify({'error': 'no file in file part'})

        log.info('request files - %s', request.files)

        file = request.files['file']
        # if user does not select file, browser also
        # submit a empty part without filename
        if file.filename == '':
            log.error('patient upload file name is empty')
            flash('No selected file')
            return jsonify({'error': 'no file'})

        # 1) VALIDATE FILE AND WRITE HEADER RECORD
        # 2) SAVE FILE TO VCF STORAGE PATH
        # 3) QUEUE IMPORT PROCESSING

        # this is used to ensure we can safely use the filename sent to us
        #filename = secure_filename(file.filename)

        # load data from the stream into memory for processing
        data = file.read()
        stream = io.StringIO(data.decode('utf-8'))
        vcf_reader = vcf.Reader(stream)

        # This approach creates a document for each sample
        samples = next(vcf_reader).samples
        sample_count = len(samples)

        stream.seek(0)
        vcf_reader = vcf.Reader(stream)

        for i in range(0, sample_count):
            stream.seek(0)
            vcf_reader = vcf.Reader(stream)
            variants = list()

            for record in vcf_reader:
                sample = record.samples[i]

                #TODO - there are better ways to handle this
                # Do we need to store the reference for this query
                # sample = record.samples[0]
                alleles = []
                if sample.gt_bases is not None:
                    log.info(sample.gt_bases)
                    alleles = re.split(r'[\\/|]', sample.gt_bases)
                    # remove duplicates
                    alleles = set(alleles)

                for allele in alleles:
                    chrom = record.CHROM
                    # remove preceeding chr if exists
                    if (re.match('chr', chrom, re.I)):
                        chrom = chrom[3:]
                    if chrom in [
                            '1', '2', '3', '4', '5', '6', '7', '8', '9', '10',
                            '11', '12', '13', '14', '15', '16', '17', '18',
                            '19', '20', '21', '22', 'X', 'Y', 'M'
                    ]:
                        variants.append(chrom + '_' + str(record.POS) + '_' +
                                        allele)

            # insert samples into the database
            VcfSampleCollection().add({'patientId': id, 'variants': variants})
    except:
        log.exception('error importing patient vcf')

    # TODO: change this to return import stats
    return jsonify({'result': 'ok'})
def delete_sample(id):
    """ Delete a VCF sample """

    VcfSampleCollection().delete(id)

    return jsonify({'result': 'ok'})