コード例 #1
0
def generate_mim2genes(genes, api_key):
    """Generate a reduced file with omim mim2gene information
    
    Args:
        genes(dict): A dictionary with hgnc_symbol as key and hgnc_id as value
        api_key(str)

    Yields:
        print_line(str): Lines from the reduced file
    """

    mim_files = fetch_mim_files(api_key, mim2genes=True)
    mim2gene_lines = mim_files['mim2genes']

    for line in mim2gene_lines:
        if line.startswith('#'):
            yield line
        else:
            break

    for gene_info in parse_mim2gene(mim2gene_lines):
        hgnc_symbol = gene_info.get('hgnc_symbol')
        if not hgnc_symbol:
            continue
        if hgnc_symbol in genes:
            yield gene_info['raw']
コード例 #2
0
ファイル: disease.py プロジェクト: hassanfa/scout
def diseases(context, api_key):
    """
    Update disease terms in mongo database.
    """
    adapter = context.obj['adapter']

    # Fetch the omim information
    api_key = api_key or context.obj.get('omim_api_key')
    if not api_key:
        LOG.warning(
            "Please provide a omim api key to load the omim gene panel")
        context.abort()

    try:
        mim_files = fetch_mim_files(api_key, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        context.abort()

    LOG.info("Dropping DiseaseTerms")
    adapter.disease_term_collection.drop()
    LOG.debug("DiseaseTerms dropped")

    load_disease_terms(
        adapter=adapter,
        genemap_lines=mim_files['genemap2'],
    )

    LOG.info("Successfully loaded all disease terms")
コード例 #3
0
def generate_mim2genes(genes, api_key):
    """Generate a reduced file with omim mim2gene information
    
    Args:
        genes(dict): A dictionary with hgnc_symbol as key and hgnc_id as value
        api_key(str)

    Yields:
        print_line(str): Lines from the reduced file
    """
    
    mim_files = fetch_mim_files(api_key, mim2genes=True)
    mim2gene_lines = mim_files['mim2genes']
    
    for line in mim2gene_lines:
        if line.startswith('#'):
            yield line
        else:
            break
    
    for gene_info in parse_mim2gene(mim2gene_lines):
        hgnc_symbol = gene_info.get('hgnc_symbol')
        if not hgnc_symbol:
            continue
        if hgnc_symbol in genes:
            yield gene_info['raw']
コード例 #4
0
def diseases(api_key):
    """
    Update disease terms in mongo database.
    """
    adapter = store

    # Fetch the omim information
    api_key = api_key or current_app.config.get('OMIM_API_KEY')
    if not api_key:
        LOG.warning("Please provide a omim api key to load the omim gene panel")
        raise click.Abort()

    try:
        mim_files = fetch_mim_files(api_key, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        raise click.Abort()

    LOG.info("Dropping DiseaseTerms")
    adapter.disease_term_collection.drop()
    LOG.debug("DiseaseTerms dropped")

    load_disease_terms(
        adapter=adapter,
        genemap_lines=mim_files['genemap2'],
    )

    LOG.info("Successfully loaded all disease terms")
コード例 #5
0
ファイル: genes.py プロジェクト: CHRUdeLille/scout
def genes(context, build, api_key):
    """
    Load the hgnc aliases to the mongo database.
    """
    adapter = context.obj['adapter']

    # Fetch the omim information
    api_key = api_key or context.obj.get('omim_api_key')
    if not api_key:
        LOG.warning("Please provide a omim api key to load the omim gene panel")
        context.abort()

    try:
        mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        context.abort()

    LOG.warning("Dropping all gene information")
    adapter.drop_genes(build)
    LOG.info("Genes dropped")

    hpo_genes = fetch_hpo_genes()
    
    if build:
        builds = [build]
    else:
        builds = ['37', '38']
    
    for build in builds:
        LOG.info("Loading hgnc file from {0}".format(hgnc_path))
        hgnc_handle = get_file_handle(hgnc_path)
        
        ensembl_handle = None
        if build == '37':
            ensembl_handle = get_file_handle(transcripts37_path)

        elif build == '38':
            ensembl_handle = get_file_handle(transcripts38_path)

        LOG.info("Loading exac gene file from {0}".format(exac_path))
        exac_handle = get_file_handle(exac_path)

        genes = link_genes(
            ensembl_lines=ensembl_handle,
            hgnc_lines=hgnc_handle,
            exac_lines=exac_handle,
            mim2gene_lines=mim_files['mim2genes'],
            genemap_lines=mim_files['genemap2'],
            hpo_lines=hpo_genes
        )
        
        load_hgnc_genes(adapter=adapter, genes=genes, build=build)
コード例 #6
0
ファイル: genes.py プロジェクト: tapaswenipathak/scout
def genes(build, api_key):
    """
    Load the hgnc aliases to the mongo database.
    """
    LOG.info("Running scout update genes")
    adapter = store

    # Fetch the omim information
    api_key = api_key or current_app.config.get('OMIM_API_KEY')
    if not api_key:
        LOG.warning(
            "Please provide a omim api key to load the omim gene panel")
        raise click.Abort()

    try:
        mim_files = fetch_mim_files(api_key,
                                    mim2genes=True,
                                    morbidmap=True,
                                    genemap2=True)
    except Exception as err:
        LOG.warning(err)
        raise click.Abort()

    LOG.warning("Dropping all gene information")
    adapter.drop_genes(build)
    LOG.info("Genes dropped")
    LOG.warning("Dropping all transcript information")
    adapter.drop_transcripts(build)
    LOG.info("transcripts dropped")

    hpo_genes = fetch_hpo_genes()

    if build:
        builds = [build]
    else:
        builds = ['37', '38']

    hgnc_lines = fetch_hgnc()
    exac_lines = fetch_exac_constraint()

    for build in builds:
        ensembl_genes = fetch_ensembl_genes(build=build)

        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim_files['mim2genes'],
            genemap_lines=mim_files['genemap2'],
            hpo_lines=hpo_genes,
            build=build,
        )

        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        ensembl_transcripts = fetch_ensembl_transcripts(build=build)

        transcripts = load_transcripts(adapter, ensembl_transcripts, build,
                                       ensembl_genes)

    adapter.update_indexes()

    LOG.info("Genes, transcripts and Exons loaded")
コード例 #7
0
    def load_omim_panel(self, api_key, institute=None):
        """Create and load the OMIM-AUTO panel"""
        existing_panel = self.gene_panel(panel_id='OMIM-AUTO')
        if not existing_panel:
            LOG.warning("OMIM-AUTO does not exists in database")
            LOG.info('Creating a first version')
            version = 1.0

        if existing_panel:
            version = float(math.floor(existing_panel['version']) + 1)

        LOG.info("Setting version to %s", version)

        try:
            mim_files = fetch_mim_files(api_key=api_key, genemap2=True, mim2genes=True)
        except Exception as err:
            raise err

        date_string = None
        # Get the correct date when omim files where released
        for line in mim_files['genemap2']:
            if 'Generated' in line:
                date_string = line.split(':')[-1].lstrip().rstrip()
        date_obj = get_date(date_string)

        if existing_panel:
            if existing_panel['date'] == date_obj:
                LOG.warning("There is no new version of OMIM")
                return

        panel_data = {}
        panel_data['path'] = None
        panel_data['type'] = 'clinical'
        panel_data['date'] = date_obj
        panel_data['panel_id'] = 'OMIM-AUTO'
        panel_data['institute'] = institute or 'cust002'
        panel_data['version'] = version
        panel_data['display_name'] = 'OMIM-AUTO'
        panel_data['genes'] = []

        alias_genes = self.genes_by_alias()

        genes = get_omim_panel_genes(
            genemap2_lines = mim_files['genemap2'],
            mim2gene_lines = mim_files['mim2genes'],
            alias_genes = alias_genes,
        )

        for gene in genes:
            panel_data['genes'].append(gene)

        panel_obj = build_panel(panel_data, self)

        if existing_panel:

            new_genes = self.compare_mim_panels(existing_panel, panel_obj)
            if new_genes:
                self.update_mim_version(new_genes, panel_obj, old_version=existing_panel['version'])
            else:
                LOG.info("The new version of omim does not differ from the old one")
                LOG.info("No update is added")
                return

        self.add_gene_panel(panel_obj)
コード例 #8
0
def setup_scout(adapter,
                institute_id='cust000',
                user_name='Clark Kent',
                user_mail='*****@*****.**',
                api_key=None,
                demo=False):
    """docstring for setup_scout"""
    ########################## Delete previous information ##########################
    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    ########################## Add a institute ##########################
    #####################################################################
    # Build a institute with id institute_name
    institute_obj = build_institute(internal_id=institute_id,
                                    display_name=institute_id,
                                    sanger_recipients=[user_mail])

    # Add the institute to database
    adapter.add_institute(institute_obj)

    ########################## Add a User ###############################
    #####################################################################
    # Build a user obj
    user_obj = dict(_id=user_mail,
                    email=user_mail,
                    name=user_name,
                    roles=['admin'],
                    institutes=[institute_id])

    adapter.add_user(user_obj)

    ### Get the mim information ###

    if not demo:
        # Fetch the mim files
        try:
            mim_files = fetch_mim_files(api_key,
                                        mim2genes=True,
                                        morbidmap=True,
                                        genemap2=True)
        except Exception as err:
            LOG.warning(err)
            context.abort()
        mim2gene_lines = mim_files['mim2genes']
        genemap_lines = mim_files['genemap2']

        # Fetch the genes to hpo information
        hpo_gene_lines = fetch_hpo_genes()
        # Fetch the latest version of the hgnc information
        hgnc_lines = fetch_hgnc()
        # Fetch the latest exac pli score information
        exac_lines = fetch_exac_constraint()

    else:
        mim2gene_lines = [
            line for line in get_file_handle(mim2gene_reduced_path)
        ]
        genemap_lines = [
            line for line in get_file_handle(genemap2_reduced_path)
        ]

        # Fetch the genes to hpo information
        hpo_gene_lines = [
            line for line in get_file_handle(hpogenes_reduced_path)
        ]
        # Fetch the reduced hgnc information
        hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)]
        # Fetch the latest exac pli score information
        exac_lines = [line for line in get_file_handle(exac_reduced_path)]

    builds = ['37', '38']
    ################## Load Genes and transcripts #######################
    #####################################################################
    for build in builds:
        # Fetch the ensembl information
        if not demo:
            ensembl_genes = fetch_ensembl_genes(build=build)
        else:
            ensembl_genes = get_file_handle(genes37_reduced_path)
        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_gene_lines,
            build=build,
        )

        # Create a map from ensembl ids to gene objects
        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        if not demo:
            ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        else:
            ensembl_transcripts = get_file_handle(transcripts37_reduced_path)
        # Load the transcripts for a certain build
        transcripts = load_transcripts(adapter, ensembl_transcripts, build,
                                       ensembl_genes)

    hpo_terms_handle = None
    hpo_to_genes_handle = None
    hpo_disease_handle = None
    if demo:
        hpo_terms_handle = get_file_handle(hpoterms_reduced_path)
        hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path)
        hpo_disease_handle = get_file_handle(
            hpo_phenotype_to_terms_reduced_path)

    load_hpo(adapter=adapter,
             hpo_lines=hpo_terms_handle,
             hpo_gene_lines=hpo_to_genes_handle,
             disease_lines=genemap_lines,
             hpo_disease_lines=hpo_disease_handle)

    # If demo we load a gene panel and some case information
    if demo:
        parsed_panel = parse_gene_panel(path=panel_path,
                                        institute='cust000',
                                        panel_id='panel1',
                                        version=1.0,
                                        display_name='Test panel')
        adapter.load_panel(parsed_panel)

        case_handle = get_file_handle(load_path)
        case_data = yaml.load(case_handle)

        adapter.load_case(case_data)

    LOG.info("Creating indexes")
    adapter.load_indexes()
    LOG.info("Scout instance setup successful")
コード例 #9
0
ファイル: hgnc_gene.py プロジェクト: tapaswenipathak/scout
def load_hgnc_genes(adapter, genes = None, ensembl_lines=None, hgnc_lines=None, exac_lines=None, mim2gene_lines=None,
                    genemap_lines=None, hpo_lines=None, build='37', omim_api_key=''):
    """Load genes into the database
        
    link_genes will collect information from all the different sources and 
    merge it into a dictionary with hgnc_id as key and gene information as values.

    Args:
        adapter(scout.adapter.MongoAdapter)
        genes(dict): If genes are already parsed
        ensembl_lines(iterable(str)): Lines formated with ensembl gene information
        hgnc_lines(iterable(str)): Lines with gene information from genenames.org
        exac_lines(iterable(str)): Lines with information pLi-scores from ExAC
        mim2gene(iterable(str)): Lines with map from omim id to gene symbol
        genemap_lines(iterable(str)): Lines with information of omim entries
        hpo_lines(iterable(str)): Lines information about map from hpo terms to genes
        build(str): What build to use. Defaults to '37'

    Returns:
        gene_objects(list): A list with all gene_objects that was loaded into database
    """
    gene_objects = list()
    
    if not genes:
        # Fetch the resources if not provided
        if ensembl_lines is None:
            ensembl_lines = fetch_ensembl_genes(build=build)
        hgnc_lines = hgnc_lines or fetch_hgnc()
        exac_lines = exac_lines or fetch_exac_constraint()
        if not (mim2gene_lines and genemap_lines):
            if not omim_api_key:
                raise SyntaxError("Need to provide omim api key")
            mim_files = fetch_mim_files(omim_api_key, mim2genes=True, genemap2=True)
            mim2gene_lines = mim_files['mim2genes']
            genemap_lines = mim_files['genemap2']
        if not hpo_lines:
            hpo_files = fetch_hpo_files(hpogenes=True)
            hpo_lines = hpo_files['hpogenes']
        
        
        # Link the resources
        genes = link_genes(
            ensembl_lines=ensembl_lines,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_lines
        )

    non_existing = 0
    nr_genes = len(genes)
    
    with progressbar(genes.values(), label="Building genes", length=nr_genes) as bar:
        for gene_data in bar:
            if not gene_data.get('chromosome'):
                LOG.debug("skipping gene: %s. No coordinates found", gene_data.get('hgnc_symbol', '?'))
                non_existing += 1
                continue
        
            gene_obj = build_hgnc_gene(gene_data, build=build)
            gene_objects.append(gene_obj)

    LOG.info("Loading genes build %s", build)
    adapter.load_hgnc_bulk(gene_objects)

    LOG.info("Loading done. %s genes loaded", len(gene_objects))
    LOG.info("Nr of genes without coordinates in build %s: %s", build,non_existing)
    
    return gene_objects
コード例 #10
0
ファイル: setup.py プロジェクト: Clinical-Genomics/scout
def setup_scout(adapter, institute_id='cust000', user_name='Clark Kent',
                user_mail='*****@*****.**', api_key=None, demo=False):
    """docstring for setup_scout"""
    ########################## Delete previous information ##########################
    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    ########################## Add a institute ##########################
    #####################################################################
    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_id,
        display_name=institute_id,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    ########################## Add a User ###############################
    #####################################################################
    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_id]
            )

    adapter.add_user(user_obj)

    ### Get the mim information ###

    if not demo:
        # Fetch the mim files
        try:
            mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
        except Exception as err:
            LOG.warning(err)
            raise err
        mim2gene_lines = mim_files['mim2genes']
        genemap_lines = mim_files['genemap2']

        # Fetch the genes to hpo information
        hpo_gene_lines = fetch_hpo_genes()
        # Fetch the latest version of the hgnc information
        hgnc_lines = fetch_hgnc()
        # Fetch the latest exac pli score information
        exac_lines = fetch_exac_constraint()


    else:
        mim2gene_lines = [line for line in get_file_handle(mim2gene_reduced_path)]
        genemap_lines = [line for line in get_file_handle(genemap2_reduced_path)]

        # Fetch the genes to hpo information
        hpo_gene_lines = [line for line in get_file_handle(hpogenes_reduced_path)]
        # Fetch the reduced hgnc information
        hgnc_lines = [line for line in get_file_handle(hgnc_reduced_path)]
        # Fetch the latest exac pli score information
        exac_lines = [line for line in get_file_handle(exac_reduced_path)]


    builds = ['37', '38']
    ################## Load Genes and transcripts #######################
    #####################################################################
    for build in builds:
        # Fetch the ensembl information
        if not demo:
            ensembl_genes = fetch_ensembl_genes(build=build)
        else:
            ensembl_genes = get_file_handle(genes37_reduced_path)
        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim2gene_lines,
            genemap_lines=genemap_lines,
            hpo_lines=hpo_gene_lines,
            build=build,
        )

        # Create a map from ensembl ids to gene objects
        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        if not demo:
            ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        else:
            ensembl_transcripts = get_file_handle(transcripts37_reduced_path)
        # Load the transcripts for a certain build
        transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes)

    hpo_terms_handle = None
    hpo_to_genes_handle = None
    hpo_disease_handle = None
    if demo:
        hpo_terms_handle = get_file_handle(hpoterms_reduced_path)
        hpo_to_genes_handle = get_file_handle(hpo_to_genes_reduced_path)
        hpo_disease_handle = get_file_handle(hpo_phenotype_to_terms_reduced_path)

    load_hpo(
        adapter=adapter,
        hpo_lines=hpo_terms_handle,
        hpo_gene_lines=hpo_to_genes_handle,
        disease_lines=genemap_lines,
        hpo_disease_lines=hpo_disease_handle
    )

    # If demo we load a gene panel and some case information
    if demo:
        parsed_panel = parse_gene_panel(
            path=panel_path,
            institute='cust000',
            panel_id='panel1',
            version=1.0,
            display_name='Test panel'
        )
        adapter.load_panel(parsed_panel)

        case_handle = get_file_handle(load_path)
        case_data = yaml.load(case_handle, Loader=yaml.FullLoader)

        adapter.load_case(case_data)

    LOG.info("Creating indexes")
    adapter.load_indexes()
    LOG.info("Scout instance setup successful")
コード例 #11
0
ファイル: setup_scout.py プロジェクト: CHRUdeLille/scout
def database(context, institute_name, user_name, user_mail, api_key):
    """Setup a scout database"""
    LOG.info("Running scout setup database")

    # Fetch the omim information
    api_key = api_key or context.obj.get('omim_api_key')
    if not api_key:
        LOG.warning("Please provide a omim api key to load the omim gene panel")
        context.abort()

    try:
        mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        context.abort()
    
    # for fn in mim_files:
    #     click.echo("{0}: {1}".format(fn, type(mim_files[fn])))
    #
    # context.abort()
    
    institute_name = institute_name or context.obj['institute_name']
    user_name = user_name or context.obj['user_name']
    user_mail = user_mail or context.obj['user_mail']

    adapter = context.obj['adapter']

    LOG.info("Deleting previous database")
    for collection_name in adapter.db.collection_names():
        if not collection_name.startswith('system'):
            LOG.info("Deleting collection %s", collection_name)
            adapter.db.drop_collection(collection_name)
    LOG.info("Database deleted")

    LOG.info("Setting up database %s", context.obj['mongodb'])

    # Build a institute with id institute_name
    institute_obj = build_institute(
        internal_id=institute_name,
        display_name=institute_name,
        sanger_recipients=[user_mail]
    )

    # Add the institute to database
    adapter.add_institute(institute_obj)

    # Build a user obj
    user_obj = dict(
                _id=user_mail,
                email=user_mail,
                name=user_name,
                roles=['admin'],
                institutes=[institute_name]
            )

    adapter.add_user(user_obj)
    
    # Fetch the genes to hpo information
    hpo_genes = fetch_hpo_genes()
    
    # Load the genes and transcripts
    genes37 = link_genes(
        ensembl_lines=get_file_handle(transcripts37_path),
        hgnc_lines=get_file_handle(hgnc_path),
        exac_lines=get_file_handle(exac_path),
        mim2gene_lines=mim_files['mim2genes'],
        genemap_lines=mim_files['genemap2'],
        hpo_lines=hpo_genes,
    )

    load_hgnc_genes(adapter, genes37, build='37')

    genes38 = link_genes(
        ensembl_lines=get_file_handle(transcripts38_path),
        hgnc_lines=get_file_handle(hgnc_path),
        exac_lines=get_file_handle(exac_path),
        mim2gene_lines=mim_files['mim2genes'],
        genemap_lines=mim_files['genemap2'],
        hpo_lines=hpo_genes,
    )

    load_hgnc_genes(adapter, genes38, build='38')

    load_hpo(
        adapter=adapter,
        disease_lines=mim_files['genemap2'],
    )

    LOG.info("Creating indexes")
    
    adapter.load_indexes()

    LOG.info("Scout instance setup successful")
コード例 #12
0
ファイル: genes.py プロジェクト: Clinical-Genomics/scout
def genes(context, build, api_key):
    """
    Load the hgnc aliases to the mongo database.
    """
    LOG.info("Running scout update genes")
    adapter = context.obj['adapter']

    # Fetch the omim information
    api_key = api_key or context.obj.get('omim_api_key')
    if not api_key:
        LOG.warning("Please provide a omim api key to load the omim gene panel")
        context.abort()

    try:
        mim_files = fetch_mim_files(api_key, mim2genes=True, morbidmap=True, genemap2=True)
    except Exception as err:
        LOG.warning(err)
        context.abort()

    LOG.warning("Dropping all gene information")
    adapter.drop_genes(build)
    LOG.info("Genes dropped")
    LOG.warning("Dropping all transcript information")
    adapter.drop_transcripts(build)
    LOG.info("transcripts dropped")

    hpo_genes = fetch_hpo_genes()
    
    if build:
        builds = [build]
    else:
        builds = ['37', '38']
    
    hgnc_lines = fetch_hgnc()
    exac_lines = fetch_exac_constraint()
    
    
    for build in builds:
        ensembl_genes = fetch_ensembl_genes(build=build)
        
        # load the genes
        hgnc_genes = load_hgnc_genes(
            adapter=adapter,
            ensembl_lines=ensembl_genes,
            hgnc_lines=hgnc_lines,
            exac_lines=exac_lines,
            mim2gene_lines=mim_files['mim2genes'],
            genemap_lines=mim_files['genemap2'],
            hpo_lines=hpo_genes,
            build=build,
        )

        ensembl_genes = {}
        for gene_obj in hgnc_genes:
            ensembl_id = gene_obj['ensembl_id']
            ensembl_genes[ensembl_id] = gene_obj

        # Fetch the transcripts from ensembl
        ensembl_transcripts = fetch_ensembl_transcripts(build=build)
        
        transcripts = load_transcripts(adapter, ensembl_transcripts, build, ensembl_genes)

    adapter.update_indexes()
        
    LOG.info("Genes, transcripts and Exons loaded")
コード例 #13
0
ファイル: panel.py プロジェクト: Clinical-Genomics/scout
    def load_omim_panel(self, api_key, institute=None):
        """Create and load the OMIM-AUTO panel"""
        existing_panel = self.gene_panel(panel_id='OMIM-AUTO')
        if not existing_panel:
            LOG.warning("OMIM-AUTO does not exists in database")
            LOG.info('Creating a first version')
            version = 1.0

        if existing_panel:
            version = float(math.floor(existing_panel['version']) + 1)

        LOG.info("Setting version to %s", version)

        try:
            mim_files = fetch_mim_files(api_key=api_key, genemap2=True, mim2genes=True)
        except Exception as err:
            raise err

        date_string = None
        # Get the correct date when omim files where released
        for line in mim_files['genemap2']:
            if 'Generated' in line:
                date_string = line.split(':')[-1].lstrip().rstrip()
        date_obj = get_date(date_string)

        if existing_panel:
            if existing_panel['date'] == date_obj:
                LOG.warning("There is no new version of OMIM")
                return

        panel_data = {}
        panel_data['path'] = None
        panel_data['type'] = 'clinical'
        panel_data['date'] = date_obj
        panel_data['panel_id'] = 'OMIM-AUTO'
        panel_data['institute'] = institute or 'cust002'
        panel_data['version'] = version
        panel_data['display_name'] = 'OMIM-AUTO'
        panel_data['genes'] = []

        alias_genes = self.genes_by_alias()

        genes = get_omim_panel_genes(
            genemap2_lines = mim_files['genemap2'],
            mim2gene_lines = mim_files['mim2genes'],
            alias_genes = alias_genes,
        )

        for gene in genes:
            panel_data['genes'].append(gene)

        panel_obj = build_panel(panel_data, self)

        if existing_panel:

            new_genes = self.compare_mim_panels(existing_panel, panel_obj)
            if new_genes:
                self.update_mim_version(new_genes, panel_obj, old_version=existing_panel['version'])
            else:
                LOG.info("The new version of omim does not differ from the old one")
                LOG.info("No update is added")
                return

        self.add_gene_panel(panel_obj)