def process_file(path, options): logging.info('Processing file "%s"' % path) output_path = get_output_path(path, options) reader = csv.reader(open(path,'rU'), delimiter='\t') writer = csv.writer(open(output_path, 'wt'), delimiter='\t') writer.writerow(['chrom', 'index', 'forward', 'reverse']) manager = ChromosomeManager(reader) while not manager.done: cname = manager.chromosome_name() logging.info('Processing chromosome %s' % cname) data = manager.load_chromosome() for read in data: writer.writerow([cname] + read)
def process_file(fpath, options): options.chromosome = '' manager = ChromosomeManager(csv.reader(open(fpath, 'rU'), delimiter='\t')) inputs = [] outputs = [] threads = [] genetrack_path = os.path.join(os.path.dirname(__file__), 'genetrack.py') temp_dir = tempfile.mkdtemp() logging.info('Preparing file "%s"' % fpath) while not manager.done: name = manager.chromosome_name() logging.info('Preparing chromosome %s' % name) data = manager.load_chromosome() input_name = os.path.join(temp_dir, name + '.txt') f = open(input_name, 'wt') writer = csv.writer(f, delimiter='\t') for read in data: writer.writerow([name] + read) f.close() inputs.append(input_name) # Process each chromosome for input in inputs: c = 'python %s %s -s %d -e %d -u %d -d %d -k %d -F %d -o %s' % ( genetrack_path, input, options.sigma, options.exclusion, options.up_width, options.down_width, options.chunk_size, options.filter, options.format) t = ProcessFileThread(c) t.start() threads.append(t) output_path = get_output_path(input, options) outputs.append(output_path) # Wait for completion for thread in threads: thread.join() # Merge together output files logging.info('Merging output files') options.chromosome = '' real_output = open(get_output_path(fpath, options), 'wt') for output in outputs: shutil.copyfileobj(open(output, 'rt'), real_output) os.unlink(output) real_output.close() shutil.rmtree(temp_dir)