print >> sys.stderr, "Fixing PSL..." ft_name_1 = give_me_temp_filename(tmp_dir=options.tmp_dir) fix_short_blocks(options.input_filename, ft_name_1, threshold=options.anchor, mismatches=options.mismatches) # for line in file(ft_name_1,'r'): # os.write(2,line) print >> sys.stderr, "Sorting PSL..." ft_name_2 = give_me_temp_filename(tmp_dir=options.tmp_dir) sort_ttdb.sort_columns( ft_name_1, ft_name_2, columns= '10,1nd,2n,18n,13', # sequence name, matches, mismatches, count blocks, target sequence header=False, ignore_case=False, tmp_dir=options.tmp_dir, parallel=cpus) # for line in file(ft_name_2,'r'): # os.write(3,line) delete_file(ft_name_1) print >> sys.stderr, "Processing PSL..." data = [] ties = [] if options.same_gene: # or file(ft_name_2,'r').readline().find(';ge=')>-1: data = contigs_unique_within_same_gene_from(ft_name_2) else: ties = ""
cpus = options.processes if cpus == 0: cpus = multiprocessing.cpu_count() # running print "Fixing..." ft_name_1 = give_me_temp_filename(tmp_dir = options.tmp_dir) fix_short_blocks(options.input_filename, ft_name_1, threshold = options.anchor, mismatches = options.mismatches) print "Sorting..." ft_name_2 = give_me_temp_filename(tmp_dir = options.tmp_dir) sort_ttdb.sort_columns(ft_name_1, ft_name_2, columns = '10,1nd,2n,18n', # sequence name, matches, mismatches, count blocks header = False, ignore_case = False, tmp_dir = options.tmp_dir, parallel = cpus ) delete_file(ft_name_1) print "Processing..." data=[] if options.same_gene: # or file(ft_name_2,'r').readline().find(';ge=')>-1: data = contigs_unique_within_same_gene_from(ft_name_2) else: data = contigs_unique_from(ft_name_2) ft_name_3 = give_me_temp_filename(tmp_dir = options.tmp_dir) file(ft_name_3,'w').writelines(data) ft_name_4 = give_me_temp_filename(tmp_dir = options.tmp_dir)