def get_sample_ids(vcf_path, provided_t_name=None, provided_n_name=None, provided_r_name=None, return_names=False): t_ids, n_ids, r_ids = [], [], [] t_names, n_names, r_names = [], [], [] from cyvcf2 import VCF vcf_samples = VCF(vcf_path).samples if provided_t_name: for sname in provided_t_name.split(','): assert sname in vcf_samples,\ f'Tumor sample name {sname} is not in VCF {vcf_path}. Found: {vcf_samples}' t_names.append(sname) if provided_n_name: for sname in provided_n_name.split(','): assert sname in vcf_samples,\ f'Normal sample name {sname} is not in VCF {vcf_path}. Found: {vcf_samples}' n_names.append(sname) if provided_r_name: for sname in provided_r_name.split(','): assert sname in vcf_samples,\ f'RNA sample name {sname} is not in VCF {vcf_path}. Found: {vcf_samples}' r_names.append(sname) if len(vcf_samples) == 1: t_names = [vcf_samples[0]] t_ids = [0] else: guessed_t_name, guessed_n_name = guess_sample_names(vcf_path) if not t_names: if not guessed_t_name: critical( f'Can\'t guess tumor sample name from the VCF {vcf_path}') t_names = [guessed_t_name] if not n_names: if guessed_n_name: n_names = [guessed_n_name] else: if t_names: n_names = [ s for s in vcf_samples if s not in t_names and s not in r_names ] if not n_names: critical( f'Can\'t guess normal sample name from the VCF {vcf_path}' ) else: critical( f'Can\'t guess normal sample name from the VCF {vcf_path}' ) if t_names: assert set(t_names) & set( vcf_samples), f't_names: {t_names}, vcf_samples: {vcf_samples}' t_ids = [vcf_samples.index(tn) for tn in t_names] if n_names: assert set(n_names) & set( vcf_samples), f'n_names: {n_names}, vcf_samples: {vcf_samples}' n_ids = [vcf_samples.index(nn) for nn in n_names] if r_names: assert set(r_names) & set( vcf_samples), f'r_names: {r_names}, vcf_samples: {vcf_samples}' r_ids = [vcf_samples.index(rn) for rn in r_names] if return_names: ret = t_names[0] if len(t_names) == 1 else t_names, \ n_names[0] if len(n_names) == 1 else n_names if r_names: ret += (r_names[0] if len(r_names) == 1 else r_names, ) else: ret = t_ids[0] if len(t_names) == 1 else t_ids, \ n_ids[0] if len(n_names) == 1 else n_ids if r_names: ret += (r_ids[0] if len(r_ids) == 1 else r_ids, ) return ret