def get_events(self, sample_name=CURRENT_SAMPLE): '''Returns VCF records for a single sample, grouped by event id''' # TODO experimenting here global json_records if sample_name != CURRENT_SAMPLE: self.load_sample(sample_name) json_records = {} for event_id in GROUPED_CURRENT_RECORDS: event_type, sv_id = self.get_event_type(event_id) description = self.get_event_type_description(event_type) breakends = self.record_list_to_dict(GROUPED_CURRENT_RECORDS[event_id]) breakend_locations = ', '.join(['{0}:{1}'.format( 'chr' + b['CHROM'] if vcf_sv_specific_variables.formatChromID(b['CHROM']) == 'chr' + b['CHROM'] else b['CHROM'], b['POS']) for b in breakends]) # TODO fix hack breakend_locations_array = [{ 'chrom': b['CHROM'], 'ucsc_chrom': vcf_sv_specific_variables.formatChromID(b['CHROM']), 'pos': b['POS'], 'sv_id': sv_id} for b in breakends] fused, genes_hit = self.any_fusion_in_event(event_id, sample_name); record = {'id': sv_id, 'type': event_type, 'vcf_id': event_id, 'description': description, 'breakend locations': breakend_locations, 'breakend locations array': breakend_locations_array, 'breakends': breakends, 'fused': 'X' if fused else '', 'genes hit': ', '.join(genes_hit) if len(genes_hit) > 0 else ''} json_records[event_id] = record; # TODO simplify structure return list(json_records.values())
def get_chrom_size(self, chrom_id, species='human', vcf_type='meerkat'): '''Returns size of given chromosome''' chrom_id = vcf_sv_specific_variables.formatChromID(chrom_id, species, vcf_type) chrom_size = vcf_sv_specific_variables.chromosome_sizes[species][chrom_id] if chrom_id else 0 return chrom_size