예제 #1
0
def generate_hgnc(genes):
    """Generate lines from a file with reduced hgnc information
    
    Args:
        genes(dict): A dictionary with hgnc_id as key and hgnc_symbol as value
        outpath(str): Defaults to hgnc_reduced_path
    
    Yields:
        print_line(str): Lines from the reduced file
    """
    LOG.info("Generating new hgnc reduced file")
    # fetch the latest hgnc file here
    hgnc_gene_lines = fetch_hgnc()

    header = None
    genes_found = 0

    # Loop over all hgnc gene lines
    for i, line in enumerate(hgnc_gene_lines):

        line = line.rstrip()
        # Skip lines that are empty
        if not len(line) > 0:
            continue
        # If we are reading the header, print it
        if i == 0:
            header = line.split('\t')
            yield line
            continue

        # Parse the hgnc gene line
        gene = parse_hgnc_line(line, header)
        if not gene:
            continue
        hgnc_id = int(gene['hgnc_id'])
        # Check if the gene is in the reduced
        if hgnc_id in genes:
            genes_found += 1
            yield line

    LOG.info("Number of genes printed to file: %s", genes_found)
예제 #2
0
def generate_hgnc(genes):
    """Generate lines from a file with reduced hgnc information
    
    Args:
        genes(dict): A dictionary with hgnc_id as key and hgnc_symbol as value
        outpath(str): Defaults to hgnc_reduced_path
    
    Yields:
        print_line(str): Lines from the reduced file
    """
    LOG.info("Generating new hgnc reduced file")
    # fetch the latest hgnc file here
    hgnc_gene_lines = fetch_hgnc() 

    header = None
    genes_found = 0
    
    # Loop over all hgnc gene lines
    for i,line in enumerate(hgnc_gene_lines):
        
        line = line.rstrip()
        # Skip lines that are empty
        if not len(line) > 0:
            continue
        # If we are reading the header, print it
        if i == 0:
            header = line.split('\t')
            yield line
            continue

        # Parse the hgnc gene line
        gene = parse_hgnc_line(line, header)
        if not gene:
            continue
        hgnc_id = int(gene['hgnc_id'])
        # Check if the gene is in the reduced
        if hgnc_id in genes:
            genes_found += 1
            yield line
    
    LOG.info("Number of genes printed to file: %s", genes_found)
예제 #3
0
def test_parse_hgnc_line(hgnc_handle):
    """Test to parse a hgnc gene line"""
    header = next(hgnc_handle).split('\t')
    first_gene = next(hgnc_handle)
    gene_info = parse_hgnc_line(header=header, line=first_gene)
    assert gene_info['hgnc_id'] == int(first_gene.split('\t')[0].split(':')[1])
예제 #4
0
def test_parse_hgnc_line(hgnc_handle):
    """Test to parse a hgnc gene line"""
    header = next(hgnc_handle).split('\t')
    first_gene = next(hgnc_handle)
    gene_info = parse_hgnc_line(header=header, line=first_gene)
    assert gene_info['hgnc_id'] == int(first_gene.split('\t')[0].split(':')[1])
예제 #5
0
def test_parse_hgnc_line(hgnc_handle):
    """Test to parse a hgnc gene line"""
    header = next(hgnc_handle).split("\t")
    first_gene = next(hgnc_handle)
    gene_info = parse_hgnc_line(header=header, line=first_gene)
    assert gene_info["hgnc_id"] == int(first_gene.split("\t")[0].split(":")[1])