def load_consequence(consequence=None): headers = {'Content-Type': 'application/json'} #resp = requests.post(uri + 'custom/LOAD', headers=headers, auth=auth, data=json.dumps(payload)) # Data definitions chrom = get_chromosome_number(consequence['Chr']['value']) chrom_name = chromosome_number2str(chrom) hgnc = consequence['SYMBOL']['value'] ensembl_id = consequence['Gene']['value'] transcript_ensembl_id = consequence['Feature_ID']['value'] hgmd_name = consequence['RefSeq_ID']['value'] # chromosome payload = {"id": chrom, "name": chrom_name} resp = requests.post(uri + 'rest/INSERT/chromosome/X.json', headers=headers, auth=auth, data=json.dumps(payload)) # gene payload = {"chromosome_id": chrom, "HGNC": hgnc, "ensembl_id": ensembl_id} resp = requests.post(uri + 'rest/INSERT/gene/X.json', headers=headers, auth=auth, data=json.dumps(payload)) gene_id = resp.id # transcript payload = { "genes_id": gene_id, "HGMD_name": hgmd_name, "ensembl_id": transcript_ensembl_id } resp = requests.post(uri + 'rest/INSERT/transcript/X.json', headers=headers, auth=auth, data=json.dumps(payload)) return True if resp.status_code == 200 else False
def load_consequence(consequence=None): headers = {"Content-Type": "application/json"} # resp = requests.post(uri + 'custom/LOAD', headers=headers, auth=auth, data=json.dumps(payload)) # Data definitions chrom = get_chromosome_number(consequence["Chr"]["value"]) chrom_name = chromosome_number2str(chrom) hgnc = consequence["SYMBOL"]["value"] ensembl_id = consequence["Gene"]["value"] transcript_ensembl_id = consequence["Feature_ID"]["value"] hgmd_name = consequence["RefSeq_ID"]["value"] # chromosome payload = {"id": chrom, "name": chrom_name} resp = requests.post(uri + "rest/INSERT/chromosome/X.json", headers=headers, auth=auth, data=json.dumps(payload)) # gene payload = {"chromosome_id": chrom, "HGNC": hgnc, "ensembl_id": ensembl_id} resp = requests.post(uri + "rest/INSERT/gene/X.json", headers=headers, auth=auth, data=json.dumps(payload)) gene_id = resp.id # transcript payload = {"genes_id": gene_id, "HGMD_name": hgmd_name, "ensembl_id": transcript_ensembl_id} resp = requests.post(uri + "rest/INSERT/transcript/X.json", headers=headers, auth=auth, data=json.dumps(payload)) return True if resp.status_code == 200 else False
def main(): global options, args # Be sure to get files bgzipped and tabix indexed for vcf_file in options.input_vcf: #if not os.path.isfile(vcf_file + '.gz'): command_line = "bgzip -c " + vcf_file + " > " + vcf_file + ".gz" shlex.split(command_line) retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True) print retcode #if not os.path.isfile(vcf_file + '.gz.tbi'): command_line = "tabix -f -p vcf " + vcf_file + ".gz" shlex.split(command_line) retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True) print retcode # First vcf file will be the template file for outputting parameters template_vcf = vcf.Reader(open(options.input_vcf[0], 'r')) # Add essential fields in both formats and infos (header information) template_vcf.formats['FREQ'] = VcfFormat('FREQ', 1, 'String', 'Variant allele frequency') template_vcf.infos['SFREQ'] = VcfInfo( 'SFREQ', 1, 'Float', 'Maximum variant allele frequency of all samples') template_vcf.infos['SDP'] = VcfInfo( 'SDP', 1, 'Integer', 'Maximum sequencing depth of all samples') # Create a list of sorted variant-sites containing chr and position variant_sites = [] for vcf_file in options.input_vcf: tmp_vcf = vcf.Reader(open(vcf_file, 'r')) for record in tmp_vcf: new_variant_site = (get_chromosome_number(record.CHROM), record.POS) if not new_variant_site in variant_sites: variant_sites.append(new_variant_site) variant_sites.sort(key=lambda variant: (variant[0], variant[1])) # Open all files for random access input_vcf = [] for index, vcf_file in enumerate(options.input_vcf): input_vcf.append(vcf.Reader(open(vcf_file + '.gz', 'r'))) # Perform tests and checks if index > 0 and input_vcf[index].samples != template_vcf.samples: print "INFO: not same sample list in", vcf_file # Add necessary FORMAT or INFO fields definitions in template for info in input_vcf[index].infos: if not info in template_vcf.infos: template_vcf.infos[info] = input_vcf[index].infos[info] for myformat in input_vcf[index].formats: if not myformat in template_vcf.formats: template_vcf.formats[myformat] = input_vcf[index].formats[ myformat] # Open output handles output_vcf = vcf.Writer(open(options.output_vcf, 'w'), template_vcf, lineterminator='\n') output_indels_vcf = vcf.Writer(open(options.output_vcf + '_indels.vcf', 'w'), template_vcf, lineterminator='\n') output_snps_vcf = vcf.Writer(open(options.output_vcf + '_snps.vcf', 'w'), template_vcf, lineterminator='\n') # Now parse each variant-site and fetch information from vcfs: for my_variant_site in variant_sites: records = [] for my_vcf in input_vcf: try: for record in my_vcf.fetch( chromosome_number2str(my_variant_site[0]), my_variant_site[1], my_variant_site[1]): # vcf.fetch returns also next position if described, must be therefore removed if record.POS == my_variant_site[1]: records.append(record) except KeyError: # This exception is raised when the primary key is not found in one of the files. No actions required pass # master_records are those records for being output to merged vcf. A master record will be created for each # group of variants from a same variant site that can be merged master_records = [records[0]] for record in records[1:]: add_to_master = False already_added = False for master_record in master_records: if master_record != record: add_to_master = True else: if not master_record.merge(record): add_to_master = True else: already_added = True if add_to_master and not already_added: master_records.append(record) for master_record in master_records: output_vcf.write_record(master_record) if master_record.is_snp: output_snps_vcf.write_record(master_record) elif master_record.is_indel: output_indels_vcf.write_record(master_record)
def main(): global options, args # Be sure to get files bgzipped and tabix indexed for vcf_file in options.input_vcf: #if not os.path.isfile(vcf_file + '.gz'): command_line = "bgzip -c " + vcf_file + " > " + vcf_file + ".gz" shlex.split(command_line) retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True) print retcode #if not os.path.isfile(vcf_file + '.gz.tbi'): command_line = "tabix -f -p vcf " + vcf_file + ".gz" shlex.split(command_line) retcode = subprocess.check_output(command_line, stderr=subprocess.STDOUT, shell=True) print retcode # First vcf file will be the template file for outputting parameters template_vcf = vcf.Reader(open(options.input_vcf[0], 'r')) # Add essential fields in both formats and infos (header information) template_vcf.formats['FREQ'] = VcfFormat('FREQ', 1, 'String', 'Variant allele frequency') template_vcf.infos['SFREQ'] = VcfInfo('SFREQ', 1, 'Float', 'Maximum variant allele frequency of all samples') template_vcf.infos['SDP'] = VcfInfo('SDP', 1, 'Integer', 'Maximum sequencing depth of all samples') # Create a list of sorted variant-sites containing chr and position variant_sites = [] for vcf_file in options.input_vcf: tmp_vcf = vcf.Reader(open(vcf_file, 'r')) for record in tmp_vcf: new_variant_site = (get_chromosome_number(record.CHROM), record.POS) if not new_variant_site in variant_sites: variant_sites.append(new_variant_site) variant_sites.sort(key=lambda variant: (variant[0], variant[1])) # Open all files for random access input_vcf = [] for index, vcf_file in enumerate(options.input_vcf): input_vcf.append(vcf.Reader(open(vcf_file + '.gz', 'r'))) # Perform tests and checks if index > 0 and input_vcf[index].samples != template_vcf.samples: print "INFO: not same sample list in", vcf_file # Add necessary FORMAT or INFO fields definitions in template for info in input_vcf[index].infos: if not info in template_vcf.infos: template_vcf.infos[info] = input_vcf[index].infos[info] for myformat in input_vcf[index].formats: if not myformat in template_vcf.formats: template_vcf.formats[myformat] = input_vcf[index].formats[myformat] # Open output handles output_vcf = vcf.Writer(open(options.output_vcf, 'w'), template_vcf, lineterminator='\n') output_indels_vcf = vcf.Writer(open(options.output_vcf+'_indels.vcf', 'w'), template_vcf, lineterminator='\n') output_snps_vcf = vcf.Writer(open(options.output_vcf+'_snps.vcf', 'w'), template_vcf, lineterminator='\n') # Now parse each variant-site and fetch information from vcfs: for my_variant_site in variant_sites: records = [] for my_vcf in input_vcf: try: for record in my_vcf.fetch(chromosome_number2str(my_variant_site[0]), my_variant_site[1], my_variant_site[1]): # vcf.fetch returns also next position if described, must be therefore removed if record.POS == my_variant_site[1]: records.append(record) except KeyError: # This exception is raised when the primary key is not found in one of the files. No actions required pass # master_records are those records for being output to merged vcf. A master record will be created for each # group of variants from a same variant site that can be merged master_records = [records[0]] for record in records[1:]: add_to_master = False already_added = False for master_record in master_records: if master_record != record: add_to_master = True else: if not master_record.merge(record): add_to_master = True else: already_added = True if add_to_master and not already_added: master_records.append(record) for master_record in master_records: output_vcf.write_record(master_record) if master_record.is_snp: output_snps_vcf.write_record(master_record) elif master_record.is_indel: output_indels_vcf.write_record(master_record)