filename_gff = sys.argv[1] filename_top2 = sys.argv[2] filename_base = filename_gff.replace('_gff','').replace('.gff','') total_gene_count = dict() gff = dict() data_list = [filename_base] if( not os.access(filename_gff, os.R_OK) ): sys.stderr.write('%s is not available.\n'%filename_gff) sys.exit(1) gff[filename_base] = gff_parser.read_gmap_gff(filename_gff) count_scaffolds = len(gff[filename_base].keys()) count_genes = gff_parser.count_genes(gff[filename_base]) total_gene_count[filename_base] = count_genes sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n'%(filename_gff, filename_base, count_scaffolds, count_genes)) best_cov_gff = dict() best_cov_gff[filename_base] = gff_parser.filter_top2(gff[filename_base],filename_top2) f_rep = open('%s_rep.gff'%filename_base,'w') f_multi = open('%s_multi.gff'%filename_base,'w') f_log = open('%s_log.gff'%filename_base,'w') gene2data = dict() t_range_list = dict() gff_new = dict() for tmp_data in data_list: for tmp_t_id in best_cov_gff[tmp_data].keys():
gene_list.append(tmp_gid) return len(set(gene_list)) total_gene_count = dict() gff = dict() data_list = [] f_list = open(filename_list, 'r') for line in f_list: (tmp_dataname, tmp_filename) = line.strip().split() if (not os.access(tmp_filename, os.R_OK)): sys.stderr.write('%s is not available.\n' % tmp_filename) continue gff[tmp_dataname] = gff_parser.read_gff(tmp_filename) data_list.append(tmp_dataname) total_gene_count[tmp_dataname] = gff_parser.count_genes(gff[tmp_dataname]) sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n' % (tmp_filename, tmp_dataname, len( gff[tmp_dataname]), total_gene_count[tmp_dataname])) f_list.close() sys.exit(1) if (len(data_list) == 0): sys.stderr.write('No input data. Exit.\n') sys.exit(1) f_rep = open('%s_rep.gff' % filename_base, 'w') f_multi = open('%s_multi.gff' % filename_base, 'w') f_log = open('%s_log.gff' % filename_base, 'w') gene_tlen = dict()
for tmp_gid in tmp_gff[tmp_tid].keys(): gene_list.append(tmp_gid) return len(set(gene_list)) total_gene_count = dict() gff = dict() data_list = [] f_list = open(filename_list,'r') for line in f_list: (tmp_dataname, tmp_filename) = line.strip().split() if( not os.access(tmp_filename, os.R_OK) ): sys.stderr.write('%s is not available.\n'%tmp_filename) continue gff[tmp_dataname] = gff_parser.read_gff(tmp_filename) data_list.append(tmp_dataname) total_gene_count[tmp_dataname] = gff_parser.count_genes(gff[tmp_dataname]) sys.stderr.write('%s -> %s: %d scaffolds, %d genes\n'%(tmp_filename, tmp_dataname,len(gff[tmp_dataname]), total_gene_count[tmp_dataname])) f_list.close() sys.exit(1) if( len(data_list) == 0 ): sys.stderr.write('No input data. Exit.\n') sys.exit(1) f_rep = open('%s_rep.gff'%filename_base,'w') f_multi = open('%s_multi.gff'%filename_base,'w') f_log = open('%s_log.gff'%filename_base,'w') gene_tlen = dict() gff_new = dict() for tmp_data in data_list: