Example #1
0
def main():

    no_threads = 8

    outdir = "raw_tables"
    os.makedirs(outdir, exist_ok=True)

    db = connect_to_mysql(Config.mysql_conf_file)
    cursor = db.cursor()
    [all_species, ensembl_db_name] = get_species(cursor)

    # unprocessed_species = check_species_done(cursor, all_species, ensembl_db_name, outdir)
    # print("unprocessed_species:", len(unprocessed_species))
    # print("\n".join(unprocessed_species))
    # exit()

    # all_species = unprocessed_species
    # all_species = ['monodelphis_domestica']
    # all_species = sample(all_species, 1)

    cursor.close()
    db.close()

    parallelize(no_threads, store_exon_seqs_species, all_species,
                [ensembl_db_name, outdir])
def main():
    
    special    = 'test'
    no_threads = 10
    method     = 'usearch'


    if len(sys.argv) > 1 and  len(sys.argv)<4:
        print "usage: %s <set name> <number of threads> <method>" % sys.argv[0]
        exit(1)  # exit if bad number of cmdline params

    elif len(sys.argv)==4:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_threads = int(sys.argv[2])
        
        method = sys.argv[3]
        if not (method =='usearch' or method=='sw_sharp'):
            print "unrecognized method: ", method
            exit(1) # exit if unrecognized search method

    # sw_sharps chokes if there is only one graphics card
    if method=='sw_sharp': no_threads = 1

    db  = connect_to_mysql()
    cfg = ConfigurationReader()
    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species (cursor)


    print '======================================='
    print sys.argv[0]
    if special:
        print "using", special, "set"
        if special == 'complement':
            gene_list = get_complement_ids(cursor, ensembl_db_name, cfg)
        else:
            gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
    else:
        print "using all protein coding genes"
        switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
        gene_list = get_gene_ids (cursor, biotype='protein_coding', is_known=1)

    cursor.close()
    db.close()

    parallelize (no_threads, find_missing_exons, gene_list, [local_db, ensembl_db_name, method])
    
    return True
Example #3
0
def main():
    
    no_threads = 10

    db     = connect_to_mysql()
    cursor = db.cursor()
    [all_species, ensembl_db_name] = get_species (cursor)
    cursor.close()
    db.close()

    parallelize (no_threads, collect_paralogues, all_species, [local_db, ensembl_db_name])
    
    return True
def main():
    
    no_threads = 1

    db = connect_to_mysql()
    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species (cursor)
    cursor.close()
    db    .close()

    parallelize (no_threads, make_alignments, all_species, [local_db, ensembl_db_name])
    
    return True
def main():
    
    no_threads = 12
    special    = None


    if len(sys.argv) > 1 and  len(sys.argv)<3:
        print "usage: %s <set name> <no of processes>" % sys.argv[0]
        exit(1) # after usage statement
    elif len(sys.argv)>=3:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_processes = int(sys.argv[2])

    db  = connect_to_mysql()
    cfg = ConfigurationReader()

    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species (cursor)

    print "running ", sys.argv[0]

    if special:
        print "using", special, "set"
        if special == 'complement':
            gene_list = get_complement_ids(cursor, ensembl_db_name, cfg)
        else:
            gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )

    else:
        print "using all protein coding genes"
        switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
        gene_list = get_gene_ids (cursor, biotype='protein_coding', is_known=1)
 

    cursor.close()
    db.close()

    parallelize (no_processes, multiple_exon_alnmt, gene_list, [local_db, ensembl_db_name])
    
    return True
def main():

    no_threads = 1
    special    = ''
    db     = connect_to_mysql()
    cfg    = ConfigurationReader()

    cursor = db.cursor()
    [all_species, ensembl_db_name] = get_species (cursor)

    species = ''
    if len(sys.argv) > 1 and  len(sys.argv)<3  or len(sys.argv) >= 2 and sys.argv[1]=="-h":
        print "usage: %s <set name/species> <number of processes>" % sys.argv[0]
        exit(1) # after usage statement
    elif len(sys.argv)==3:
        special = sys.argv[1].lower()
        if special == 'none': 
            special = None
        elif special in all_species:
            species = special
        no_threads = int(sys.argv[2])
        
    print '======================================='
    print sys.argv[0]
    if species:
        print species, "only"
        switch_to_db (cursor, ensembl_db_name[species])
        if (species=='homo_sapiens'):
            gene_ids = get_gene_ids (cursor, biotype='protein_coding', is_known=1, ref_only=True)
        else:
            gene_ids = get_gene_ids (cursor, biotype='protein_coding')
        parallelize_args = [no_threads, one_species_all_genes_loop, gene_ids,  [local_db, ensembl_db_name, species]]
    elif special:
        print "using", special, "set"
        gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
        parallelize_args = [no_threads, ortologues_for_given_genes_loop, gene_list,  [local_db, ensembl_db_name]]
    else:
        parallelize_args = [no_threads, all_species_all_genes_loop, all_species, [local_db, ensembl_db_name]]
        
    cursor.close()
    db    .close()

    parallelize (*parallelize_args)
def main():

    no_threads = 1
    special = 'test'

    if len(sys.argv) > 1 and len(sys.argv) < 3:
        print "usage: %s <set name> <number of threads>" % sys.argv[0]
        exit(1)
    elif len(sys.argv) == 3:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_threads = int(sys.argv[2])

    db = connect_to_mysql()
    cfg = ConfigurationReader()
    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species(cursor)

    print '======================================='
    print sys.argv[0]
    if special:
        print "using", special, "set"
        if special == 'complement':
            gene_list = get_complement_ids(cursor, ensembl_db_name, cfg)
        else:
            gene_list = get_theme_ids(cursor, ensembl_db_name, cfg, special)
    else:
        print "using all protein coding genes"
        switch_to_db(cursor, ensembl_db_name['homo_sapiens'])
        gene_list = get_gene_ids(cursor, biotype='protein_coding', is_known=1)

    cursor.close()
    db.close()

    parallelize(no_threads, exon_cleanup, gene_list,
                [local_db, ensembl_db_name])

    return True
def main():
    
    no_threads = 1
    special    = 'one'

    if len(sys.argv) > 1 and  len(sys.argv)<3:
        print "usage: %s <set name> <number of threads> <method>"
        exit(1)
    elif len(sys.argv)==3:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_threads = int(sys.argv[2])

    db  = connect_to_mysql()
    cfg = ConfigurationReader()
    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species (cursor)

    print '======================================='
    print sys.argv[0]
    if special:
        print "using", special, "set"
        if special == 'complement':
            gene_list = get_complement_ids(cursor, ensembl_db_name, cfg)
        else:
            gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
    else:
        print "using all protein coding genes that have an sw# or usearch patch"
        switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
        gene_list = human_genes_w_novel_exon_orthologues (cursor, ensembl_db_name)

    cursor.close()
    db.close()

    parallelize (no_threads, maps_for_gene_list, gene_list[0:15000], [local_db, ensembl_db_name])
    
    return True
def main():
    
    no_threads = 10
    special    = None

    if len(sys.argv) > 1 and  len(sys.argv)<3:
        print "usage: %s <set name> <number of threads> " % sys.argv[0]
        exit(1)
    elif len(sys.argv)==3:

        special = sys.argv[1]
        special = special.lower()
        if special == 'none': special = None

        no_threads = int(sys.argv[2])

    db  = connect_to_mysql()
    cfg = ConfigurationReader()
    cursor = db.cursor()

    [all_species, ensembl_db_name] = get_species (cursor)
    
    print '======================================='
    print sys.argv[0]
    if special:
        print "using", special, "set"
        gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
    else:
        print "using all protein coding genes"
        switch_to_db (cursor,  ensembl_db_name['homo_sapiens'])
        gene_list = get_gene_ids (cursor, biotype='protein_coding', is_known=1)
        

    cursor.close()
    db.close()

    parallelize (no_threads, maps_for_gene_list, gene_list, [local_db, ensembl_db_name])
    
    return True
Example #10
0
def main():
	# TODO: cplit codons
	no_processes = 1
	rep_species = 'monodelphis_domestica'

	db = connect_to_mysql(Config.mysql_conf_file)
	cursor = db.cursor()

	[all_species, ensembl_db_name] = get_species(cursor)
	# switch_to_db(cursor,  ensembl_db_name[rep_species])
	# qry = "select distinct(g.gene_id) from exon_map e left join gene2exon g on e.exon_id = g.exon_id"
	# genes_with_maps = [ret[0] for ret in hard_landing_search(cursor, qry)]
	# print(f"genes with maps: {len(genes_with_maps)}")
	# gene_list = sample(genes_with_maps, 10)
	gene_list = [8979] # NARS; only three exons out of 14, and not the same species as the ones for which I have the full seqeuence

	cursor.close()
	db.close()

	parallelize(no_processes, multiple_alignment_genes, gene_list, [rep_species, ensembl_db_name])

	return True
def main():

    """
    Main entry point, but in reality does nothing except taking care of the parallelization.
    The parallelization here is per-species.
    """

    no_threads = 1
    special    = ''

    if len(sys.argv) > 1 and  len(sys.argv)<3  or len(sys.argv) >= 2 and sys.argv[1]=="-h":
        print "usage: %s <set name> <number of threads>" % sys.argv[0]
        exit(1) # after usage statment
    elif len(sys.argv)==3:
        special = sys.argv[1].lower()
        if special == 'none': special = None
        no_threads = int(sys.argv[2])

    db     = connect_to_mysql()
    cfg    = ConfigurationReader()
    cursor = db.cursor()
    [all_species, ensembl_db_name] = get_species (cursor)
    print '======================================='
    print sys.argv[0]
    if special:
        print "using", special, "set"
        gene_list = get_theme_ids (cursor,  ensembl_db_name, cfg, special )
 
    cursor.close()
    db    .close()

    # two version of the main loop:
    # 1) over all species, and all genes in each speceis
    if not special:
        parallelize (no_threads, store_exon_seqs, all_species, [local_db, ensembl_db_name])
    else:
        parallelize (no_threads, store_exon_seqs_special, gene_list,  [local_db, ensembl_db_name])