def generate_hgnc(genes): """Generate lines from a file with reduced hgnc information Args: genes(dict): A dictionary with hgnc_id as key and hgnc_symbol as value outpath(str): Defaults to hgnc_reduced_path Yields: print_line(str): Lines from the reduced file """ LOG.info("Generating new hgnc reduced file") # fetch the latest hgnc file here hgnc_gene_lines = fetch_hgnc() header = None genes_found = 0 # Loop over all hgnc gene lines for i, line in enumerate(hgnc_gene_lines): line = line.rstrip() # Skip lines that are empty if not len(line) > 0: continue # If we are reading the header, print it if i == 0: header = line.split('\t') yield line continue # Parse the hgnc gene line gene = parse_hgnc_line(line, header) if not gene: continue hgnc_id = int(gene['hgnc_id']) # Check if the gene is in the reduced if hgnc_id in genes: genes_found += 1 yield line LOG.info("Number of genes printed to file: %s", genes_found)
def generate_hgnc(genes): """Generate lines from a file with reduced hgnc information Args: genes(dict): A dictionary with hgnc_id as key and hgnc_symbol as value outpath(str): Defaults to hgnc_reduced_path Yields: print_line(str): Lines from the reduced file """ LOG.info("Generating new hgnc reduced file") # fetch the latest hgnc file here hgnc_gene_lines = fetch_hgnc() header = None genes_found = 0 # Loop over all hgnc gene lines for i,line in enumerate(hgnc_gene_lines): line = line.rstrip() # Skip lines that are empty if not len(line) > 0: continue # If we are reading the header, print it if i == 0: header = line.split('\t') yield line continue # Parse the hgnc gene line gene = parse_hgnc_line(line, header) if not gene: continue hgnc_id = int(gene['hgnc_id']) # Check if the gene is in the reduced if hgnc_id in genes: genes_found += 1 yield line LOG.info("Number of genes printed to file: %s", genes_found)
def test_parse_hgnc_line(hgnc_handle): """Test to parse a hgnc gene line""" header = next(hgnc_handle).split('\t') first_gene = next(hgnc_handle) gene_info = parse_hgnc_line(header=header, line=first_gene) assert gene_info['hgnc_id'] == int(first_gene.split('\t')[0].split(':')[1])
def test_parse_hgnc_line(hgnc_handle): """Test to parse a hgnc gene line""" header = next(hgnc_handle).split("\t") first_gene = next(hgnc_handle) gene_info = parse_hgnc_line(header=header, line=first_gene) assert gene_info["hgnc_id"] == int(first_gene.split("\t")[0].split(":")[1])