Ejemplo n.º 1
0
def load_consequence(consequence=None):
    headers = {'Content-Type': 'application/json'}
    #resp = requests.post(uri + 'custom/LOAD', headers=headers, auth=auth, data=json.dumps(payload))
    # Data definitions
    chrom = get_chromosome_number(consequence['Chr']['value'])
    chrom_name = chromosome_number2str(chrom)
    hgnc = consequence['SYMBOL']['value']
    ensembl_id = consequence['Gene']['value']
    transcript_ensembl_id = consequence['Feature_ID']['value']
    hgmd_name = consequence['RefSeq_ID']['value']

    # chromosome
    payload = {"id": chrom, "name": chrom_name}
    resp = requests.post(uri + 'rest/INSERT/chromosome/X.json',
                         headers=headers,
                         auth=auth,
                         data=json.dumps(payload))
    # gene
    payload = {"chromosome_id": chrom, "HGNC": hgnc, "ensembl_id": ensembl_id}
    resp = requests.post(uri + 'rest/INSERT/gene/X.json',
                         headers=headers,
                         auth=auth,
                         data=json.dumps(payload))
    gene_id = resp.id
    # transcript
    payload = {
        "genes_id": gene_id,
        "HGMD_name": hgmd_name,
        "ensembl_id": transcript_ensembl_id
    }
    resp = requests.post(uri + 'rest/INSERT/transcript/X.json',
                         headers=headers,
                         auth=auth,
                         data=json.dumps(payload))
    return True if resp.status_code == 200 else False
Ejemplo n.º 2
0
def load_consequence(consequence=None):
    headers = {"Content-Type": "application/json"}
    # resp = requests.post(uri + 'custom/LOAD', headers=headers, auth=auth, data=json.dumps(payload))
    # Data definitions
    chrom = get_chromosome_number(consequence["Chr"]["value"])
    chrom_name = chromosome_number2str(chrom)
    hgnc = consequence["SYMBOL"]["value"]
    ensembl_id = consequence["Gene"]["value"]
    transcript_ensembl_id = consequence["Feature_ID"]["value"]
    hgmd_name = consequence["RefSeq_ID"]["value"]

    # chromosome
    payload = {"id": chrom, "name": chrom_name}
    resp = requests.post(uri + "rest/INSERT/chromosome/X.json", headers=headers, auth=auth, data=json.dumps(payload))
    # gene
    payload = {"chromosome_id": chrom, "HGNC": hgnc, "ensembl_id": ensembl_id}
    resp = requests.post(uri + "rest/INSERT/gene/X.json", headers=headers, auth=auth, data=json.dumps(payload))
    gene_id = resp.id
    # transcript
    payload = {"genes_id": gene_id, "HGMD_name": hgmd_name, "ensembl_id": transcript_ensembl_id}
    resp = requests.post(uri + "rest/INSERT/transcript/X.json", headers=headers, auth=auth, data=json.dumps(payload))
    return True if resp.status_code == 200 else False
Ejemplo n.º 3
0
def main():
    global options, args

    # Be sure to get files bgzipped and tabix indexed
    for vcf_file in options.input_vcf:
        #if not os.path.isfile(vcf_file + '.gz'):
        command_line = "bgzip -c " + vcf_file + " > " + vcf_file + ".gz"
        shlex.split(command_line)
        retcode = subprocess.check_output(command_line,
                                          stderr=subprocess.STDOUT,
                                          shell=True)
        print retcode
        #if not os.path.isfile(vcf_file + '.gz.tbi'):
        command_line = "tabix -f -p vcf " + vcf_file + ".gz"
        shlex.split(command_line)
        retcode = subprocess.check_output(command_line,
                                          stderr=subprocess.STDOUT,
                                          shell=True)
        print retcode

    # First vcf file will be the template file for outputting parameters
    template_vcf = vcf.Reader(open(options.input_vcf[0], 'r'))

    # Add essential fields in both formats and infos (header information)
    template_vcf.formats['FREQ'] = VcfFormat('FREQ', 1, 'String',
                                             'Variant allele frequency')
    template_vcf.infos['SFREQ'] = VcfInfo(
        'SFREQ', 1, 'Float', 'Maximum variant allele frequency of all samples')
    template_vcf.infos['SDP'] = VcfInfo(
        'SDP', 1, 'Integer', 'Maximum sequencing depth of all samples')

    # Create a list of sorted variant-sites containing chr and position
    variant_sites = []
    for vcf_file in options.input_vcf:
        tmp_vcf = vcf.Reader(open(vcf_file, 'r'))
        for record in tmp_vcf:
            new_variant_site = (get_chromosome_number(record.CHROM),
                                record.POS)
            if not new_variant_site in variant_sites:
                variant_sites.append(new_variant_site)
    variant_sites.sort(key=lambda variant: (variant[0], variant[1]))

    # Open all files for random access
    input_vcf = []
    for index, vcf_file in enumerate(options.input_vcf):
        input_vcf.append(vcf.Reader(open(vcf_file + '.gz', 'r')))
        # Perform tests and checks
        if index > 0 and input_vcf[index].samples != template_vcf.samples:
            print "INFO: not same sample list in", vcf_file

        # Add necessary FORMAT or INFO fields definitions in template
        for info in input_vcf[index].infos:
            if not info in template_vcf.infos:
                template_vcf.infos[info] = input_vcf[index].infos[info]
        for myformat in input_vcf[index].formats:
            if not myformat in template_vcf.formats:
                template_vcf.formats[myformat] = input_vcf[index].formats[
                    myformat]

    # Open output handles
    output_vcf = vcf.Writer(open(options.output_vcf, 'w'),
                            template_vcf,
                            lineterminator='\n')
    output_indels_vcf = vcf.Writer(open(options.output_vcf + '_indels.vcf',
                                        'w'),
                                   template_vcf,
                                   lineterminator='\n')
    output_snps_vcf = vcf.Writer(open(options.output_vcf + '_snps.vcf', 'w'),
                                 template_vcf,
                                 lineterminator='\n')

    # Now parse each variant-site and fetch information from vcfs:
    for my_variant_site in variant_sites:
        records = []
        for my_vcf in input_vcf:
            try:
                for record in my_vcf.fetch(
                        chromosome_number2str(my_variant_site[0]),
                        my_variant_site[1], my_variant_site[1]):
                    # vcf.fetch returns also next position if described, must be therefore removed
                    if record.POS == my_variant_site[1]:
                        records.append(record)
            except KeyError:
                # This exception is raised when the primary key is not found in one of the files. No actions required
                pass
        # master_records are those records for being output to merged vcf. A master record will be created for each
        # group of variants from a same variant site that can be merged
        master_records = [records[0]]
        for record in records[1:]:
            add_to_master = False
            already_added = False
            for master_record in master_records:
                if master_record != record:
                    add_to_master = True
                else:
                    if not master_record.merge(record):
                        add_to_master = True
                    else:
                        already_added = True
            if add_to_master and not already_added:
                master_records.append(record)

        for master_record in master_records:
            output_vcf.write_record(master_record)
            if master_record.is_snp:
                output_snps_vcf.write_record(master_record)
            elif master_record.is_indel:
                output_indels_vcf.write_record(master_record)
def main():
    global options, args

    # Be sure to get files bgzipped and tabix indexed
    for vcf_file in options.input_vcf:
        #if not os.path.isfile(vcf_file + '.gz'):
        command_line = "bgzip -c " + vcf_file + " > " + vcf_file + ".gz"
        shlex.split(command_line)
        retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True)
        print retcode
        #if not os.path.isfile(vcf_file + '.gz.tbi'):
        command_line = "tabix -f -p vcf " + vcf_file + ".gz"
        shlex.split(command_line)
        retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True)
        print retcode

    # First vcf file will be the template file for outputting parameters
    template_vcf = vcf.Reader(open(options.input_vcf[0], 'r'))

    # Add essential fields in both formats and infos (header information)
    template_vcf.formats['FREQ'] = VcfFormat('FREQ', 1, 'String', 'Variant allele frequency')
    template_vcf.infos['SFREQ'] = VcfInfo('SFREQ', 1, 'Float', 'Maximum variant allele frequency of all samples')
    template_vcf.infos['SDP'] = VcfInfo('SDP', 1, 'Integer', 'Maximum sequencing depth of all samples')

    # Create a list of sorted variant-sites containing chr and position
    variant_sites = []
    for vcf_file in options.input_vcf:
        tmp_vcf = vcf.Reader(open(vcf_file, 'r'))
        for record in tmp_vcf:
            new_variant_site = (get_chromosome_number(record.CHROM), record.POS)
            if not new_variant_site in variant_sites:
                variant_sites.append(new_variant_site)
    variant_sites.sort(key=lambda variant: (variant[0], variant[1]))

    # Open all files for random access
    input_vcf = []
    for index, vcf_file in enumerate(options.input_vcf):
        input_vcf.append(vcf.Reader(open(vcf_file + '.gz', 'r')))
        # Perform tests and checks
        if index > 0 and input_vcf[index].samples != template_vcf.samples:
            print "INFO: not same sample list in", vcf_file

        # Add necessary FORMAT or INFO fields definitions in template
        for info in input_vcf[index].infos:
            if not info in template_vcf.infos:
                template_vcf.infos[info] = input_vcf[index].infos[info]
        for myformat in input_vcf[index].formats:
            if not myformat in template_vcf.formats:
                template_vcf.formats[myformat] = input_vcf[index].formats[myformat]

    # Open output handles
    output_vcf = vcf.Writer(open(options.output_vcf, 'w'), template_vcf, lineterminator='\n')
    output_indels_vcf = vcf.Writer(open(options.output_vcf+'_indels.vcf', 'w'), template_vcf, lineterminator='\n')
    output_snps_vcf = vcf.Writer(open(options.output_vcf+'_snps.vcf', 'w'), template_vcf, lineterminator='\n')

    # Now parse each variant-site and fetch information from vcfs:
    for my_variant_site in variant_sites:
        records = []
        for my_vcf in input_vcf:
            try:
                for record in my_vcf.fetch(chromosome_number2str(my_variant_site[0]), my_variant_site[1],
                                           my_variant_site[1]):
                    # vcf.fetch returns also next position if described, must be therefore removed
                    if record.POS == my_variant_site[1]:
                        records.append(record)
            except KeyError:
                # This exception is raised when the primary key is not found in one of the files. No actions required
                pass
        # master_records are those records for being output to merged vcf. A master record will be created for each
        # group of variants from a same variant site that can be merged
        master_records = [records[0]]
        for record in records[1:]:
            add_to_master = False
            already_added = False
            for master_record in master_records:
                if master_record != record:
                    add_to_master = True
                else:
                    if not master_record.merge(record):
                        add_to_master = True
                    else:
                        already_added = True
            if add_to_master and not already_added:
                master_records.append(record)

        for master_record in master_records:
            output_vcf.write_record(master_record)
            if master_record.is_snp:
                output_snps_vcf.write_record(master_record)
            elif master_record.is_indel:
                output_indels_vcf.write_record(master_record)