def parse_whitelist_file(whitelist_file, n_mismatch, n_repeat): barcode_list, _ = utils.read_one_col(whitelist_file) barcode_set = set(barcode_list) barcode_mismatch_dict = get_mismatch_dict(barcode_list, n_mismatch) barcode_mismatch_list = [barcode_mismatch_dict] * n_repeat barcode_set_list = [barcode_set] * n_repeat return barcode_set_list, barcode_mismatch_list
def __init__(self, args, display_title=None): Step.__init__(self, args, display_title=display_title) # set self.match_barcode_list, self.n_cell = utils.read_barcode_file(args.match_dir) self.match_barcode = set(self.match_barcode_list) if args.panel: self.gene_list = utils.get_gene_region_from_bed(args.panel)[0] self.n_gene = len(self.gene_list) else: self.gene_list, self.n_gene = utils.read_one_col(args.gene_list) if not self.gene_list: sys.exit("You must provide either --panel or --gene_list!") self.count_dict = utils.genDict(dim=3, valType=int) self.add_metric( name="Number of Target Genes", value=self.n_gene, ) self.add_metric( name="Number of Cells", value=self.n_cell, ) # out file self.out_bam_file = f'{self.out_prefix}_filtered.bam' self.out_bam_file_sorted = f'{self.out_prefix}_filtered_sorted.bam'
def setUp(self): os.chdir('/SGRNJ01/RD_dir/pipeline_test/zhouyiqi/0910_panel/') self.sample = 'S20071508_D_TS' count_detail_file = './/S20071508_D_TS/05.count_capture_rna/S20071508_D_TS_count_detail.txt' self.df = pd.read_table(count_detail_file, header=0) self.match_dir = '/SGRNJ02/RandD4/RD20051303_Panel/20200729/S20071508_D_ZL' self.sc_cell_barcodes, self.sc_cell_number = read_barcode_file(self.match_dir) self.outdir = f'{self.sample}/05.count_capture_rna/' self.genomeDir = '/SGRNJ/Public/Database/genome/homo_sapiens/ensembl_92' self.validated_barcodes, _ = read_one_col(f'{self.sample}/05.count_capture_rna/{self.sample}_matrix_10X/barcodes.tsv') _refFlat, self.gtf = glob_genomeDir(self.genomeDir) self.assay = 'capture_rna'
def convert(gene_list_file, gtf): gene_list_name, _count = read_one_col(gene_list_file) id_name = gene_convert(gtf) name_id = {} for id in id_name: name = id_name[id] name_id[name] = id gene_id_name_dic = {} for gene_name in gene_list_name: gene_id = name_id[gene_name] gene_id_name_dic[gene_id] = gene_name return gene_id_name_dic
def __init__(self, args): self.args = args self.rna_fq_file = glob.glob(f'{args.match_dir}/*barcode/*_2.fq*')[0] self.barcodes, _num = utils.read_one_col(args.barcode_file) self.read_index = set() # out self.out_fq1 = f'{args.outdir}/{SAMPLE}_1.fq.gz' self.out_fq2 = f'{args.outdir}/{SAMPLE}_2.fq.gz' # mkdir if not os.path.exists(args.outdir): os.system(f'mkdir -p {args.outdir}')