filename_gff = sys.argv[1]
filename_top2 = sys.argv[2]

filename_base = filename_gff.replace('_gff','').replace('.gff','')

total_gene_count = dict()

gff = dict()
data_list = [filename_base]
if( not os.access(filename_gff, os.R_OK) ):
    sys.stderr.write('%s is not available.\n'%filename_gff)
    sys.exit(1)
gff[filename_base] = gff_parser.read_gmap_gff(filename_gff)

count_scaffolds = len(gff[filename_base].keys())
count_genes = gff_parser.count_genes(gff[filename_base])
total_gene_count[filename_base] = count_genes
sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n'%(filename_gff, filename_base, count_scaffolds, count_genes))

best_cov_gff = dict()
best_cov_gff[filename_base] = gff_parser.filter_top2(gff[filename_base],filename_top2)

f_rep = open('%s_rep.gff'%filename_base,'w')
f_multi = open('%s_multi.gff'%filename_base,'w')
f_log = open('%s_log.gff'%filename_base,'w')

gene2data = dict()
t_range_list = dict()
gff_new = dict()
for tmp_data in data_list:
    for tmp_t_id in best_cov_gff[tmp_data].keys():
Ejemplo n.º 2
0
            gene_list.append(tmp_gid)
    return len(set(gene_list))


total_gene_count = dict()
gff = dict()
data_list = []
f_list = open(filename_list, 'r')
for line in f_list:
    (tmp_dataname, tmp_filename) = line.strip().split()
    if (not os.access(tmp_filename, os.R_OK)):
        sys.stderr.write('%s is not available.\n' % tmp_filename)
        continue
    gff[tmp_dataname] = gff_parser.read_gff(tmp_filename)
    data_list.append(tmp_dataname)
    total_gene_count[tmp_dataname] = gff_parser.count_genes(gff[tmp_dataname])
    sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n' %
                     (tmp_filename, tmp_dataname, len(
                         gff[tmp_dataname]), total_gene_count[tmp_dataname]))
f_list.close()
sys.exit(1)

if (len(data_list) == 0):
    sys.stderr.write('No input data. Exit.\n')
    sys.exit(1)

f_rep = open('%s_rep.gff' % filename_base, 'w')
f_multi = open('%s_multi.gff' % filename_base, 'w')
f_log = open('%s_log.gff' % filename_base, 'w')

gene_tlen = dict()
        for tmp_gid in tmp_gff[tmp_tid].keys():
            gene_list.append(tmp_gid)
    return len(set(gene_list))

total_gene_count = dict()
gff = dict()
data_list = []
f_list = open(filename_list,'r')
for line in f_list:
    (tmp_dataname, tmp_filename) = line.strip().split()
    if( not os.access(tmp_filename, os.R_OK) ):
        sys.stderr.write('%s is not available.\n'%tmp_filename)
        continue
    gff[tmp_dataname] = gff_parser.read_gff(tmp_filename)
    data_list.append(tmp_dataname)
    total_gene_count[tmp_dataname] = gff_parser.count_genes(gff[tmp_dataname])
    sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n'%(tmp_filename, tmp_dataname,len(gff[tmp_dataname]), total_gene_count[tmp_dataname]))
f_list.close()
sys.exit(1)

if( len(data_list) == 0 ):
    sys.stderr.write('No input data. Exit.\n')
    sys.exit(1)

f_rep = open('%s_rep.gff'%filename_base,'w')
f_multi = open('%s_multi.gff'%filename_base,'w')
f_log = open('%s_log.gff'%filename_base,'w')

gene_tlen = dict()
gff_new = dict()
for tmp_data in data_list: