def combine_asr_tables(output_files,verbose=False): """ Combine all tables coming from asr output. Cuts 2nd column out and joins them together into single table. Assumes all output files have same row identifiers and that these are in the same order. """ #Going to store an array of arrays here combined_table=[] #load in the first column (containing row ids). File doesn't matter since they should all have identical first columns. table=LoadTable(filename=output_files[0],header=True,sep='\t') row_ids = table.getRawData(columns=[table.Header[0]]) combined_table.append([table.Header[0]]) for row_id in row_ids: combined_table.append([row_id]) #Now add the rest of the files to the table for i,output_file in enumerate(output_files): if verbose: print "Combining file {0} of {1}: {2}".format(i,len(output_files),output_file) #pull out the second column (first column with actual preditions) table=LoadTable(filename=output_file,header=True,sep='\t') predictions = table.getRawData(columns=[table.Header[1]]) #Add the header for our column to the list of headers combined_table[0].append(table.Header[1]) #Add rest of values in the column j=1 for prediction in predictions: combined_table[j].append(prediction) j+=1 return combined_table
def ace_for_picrust(tree_path,trait_table_path,method='pic',HALT_EXEC=False): '''Runs the Ace application controller given path of tree and trait table and returns a Table''' #initialize Ace app controller ace=Ace(HALT_EXEC=HALT_EXEC) tmp_output_count_path=get_tmp_filename() tmp_output_prob_path=get_tmp_filename() #quote file names tree_path='"{0}"'.format(tree_path) trait_table_path='"{0}"'.format(trait_table_path) as_string = " ".join([tree_path,trait_table_path,method,tmp_output_count_path,tmp_output_prob_path]) #Run ace here result = ace(data=as_string) #Load the output into Table objects try: asr_table=LoadTable(filename=tmp_output_count_path,header=True,sep='\t') except IOError: raise RuntimeError,\ ("R reported an error on stderr:" " %s" % "\n".join(result["StdErr"].readlines())) asr_prob_table=LoadTable(filename=tmp_output_prob_path,header=True,sep='\t') #Remove tmp files remove(tmp_output_count_path) remove(tmp_output_prob_path) return asr_table,asr_prob_table
def make_bed_entries(mapped_read_path, chrom_number, feature_name, output_bed_file, max_read_length=None, count_max_length=False, strand=NULL_STRAND, sep='\t', is_sorted=True, ui=None): """ translates a numpy array of mapped chromosome positions into a BED file. Arguments: - mapped_read_path: path to table containing read coordinates, frequency data - output_write_path: path to BED-6 format output file - max_read_length: maximum length of a read length - count_max_length: if max_read_length provided, all mapped seqs set to this length - strand: only reads from specified strand are added. Default is both. - sep: the delimiter in the read coordinates file - is_sorted: whether the read file is already sorted """ data = LoadTable(mapped_read_path, sep=sep) assert list(data.Header) == ['start', 'length', 'strand', 'freq'],\ "mapped read Table header doesn't match expected" if not is_sorted: data = data.sorted(columns='start') if count_max_length: assert max_read_length, 'must specify max_read_length to use'\ ' count_max_length' data = data.array.astype(int) total_data = data.shape[0] chrom = "chr%s" % (chrom_number) score = 30 name = feature_name for i, row in enumerate(data): if i % 10 == 0: ui.display( 'Converting mapped locations [%d / %d]' % (i, total_data), i / total_data) start = row[0] # move from 1-based BWA calls to 0-based BED end = start + row[ 1] # end is 1 beyond actual mapped end, so length = end - start strand = ['+', '-'][(row[2] > 0) == 0] # 1,-1 converted to '+','-' #print "\n Row values: %d\t%d\t%d\t%d\n" % (row[0], row[1], row[2], row[3]) for counts in range(row[3]): bed_string = '%s\t%s\t%s\t%s\t%s\t%s\n' % \ (chrom, start, end, name, score, strand) #print bed_string output_bed_file.write(bed_string) print "Converted %d mapped locations" % (total_data)
def test_export_table(self): """correctly generates table file""" orig_data = dict(counts=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]], ranks=[0, 1, 2, 3, 4], labels=['a', 'b', 'c', 'd', 'e']) coll = RegionCollection(**orig_data) expect = coll.toTable().getRawData() coll.writeToFile('testdata', as_table=True) got = LoadTable('testdata', sep='\t') self.assertEqual(got.getRawData(), expect) remove_files(['testdata'], error_on_missing=False)
def toTable(self): """builds a tab separated table for writeToFile""" header = [] if self.labels is not None: labels = self.labels.tolist() header.append('gene') if self.ranks is not None: ranks = self.ranks.tolist() header.append('rank') save_data = self.counts.tolist() n_cols = len(save_data[0]) window = n_cols/2 posn = range(-window, window) header.extend(map(str, posn)) for i in range(len(save_data)): if self.ranks is not None: save_data[i].insert(0, ranks[i]) if self.labels is not None: save_data[i].insert(0, labels[i]) out_table = LoadTable(header=header, rows=save_data, sep='\t') return out_table
def Pseudovalues(self): """Return a table of the Pseudovalues""" # if the statistics haven't been run yet. if self._pseudovalues is None: self.jackknife() # detailed table title = 'Pseudovalues' rows = [] for index in range(self.n): row = [index] pseudovalues = self._pseudovalues[index] try: for value in pseudovalues: row.append(value) except TypeError: row.append(pseudovalues) rows.append(row) header = ['i'] pseudovalues = self._pseudovalues[0] try: num_datasets = len(pseudovalues) for i in range(num_datasets): header.append('Pseudovalue_%s-i' % i) except TypeError: header.append('Pseudovalue-i') return LoadTable(rows=rows, header=header, title=title)
def SubSampleStats(self): """Return a table of the sub-sample statistics""" # if the statistics haven't been run yet. if self._subset_statistics is None: self.jackknife() # generate table title = 'Subsample Stats' rows = [] for index in range(self.n): row = [] row.append(index) subset_statistics = self._subset_statistics[index] try: for value in subset_statistics: row.append(value) except TypeError: row.append(subset_statistics) rows.append(row) header = ['i'] subset_stats = self._subset_statistics[0] try: num_datasets = len(subset_stats) for i in range(num_datasets): header.append('Stat_%s-i' % i) except TypeError: header.append('Stat-i') return LoadTable(rows=rows, header=header, title=title)
def main(): rr = RunRecord('add_expression_db') rr.addCommands(sys.argv) args = script_info['args'].parse(window_title='Add Expression to DB') session = db_query.make_session(args.db_path) name = args.name description = args.description ref_file = args.expression_data sample_type = args.sample_type # Check that Sample and Reference File are both unique if name in db_query.get_sample_entries(session): rr.dieOnCritical('Sample name already exists', name) if ref_file in db_query.get_reffile_entries(session, reffile_name=ref_file): rr.dieOnCritical('ReferenceFile already loaded', ref_file) if sample_types[sample_type] == sample_types['abs_expr']: expr_table = gene_expr_to_table( args.expression_data, stable_id_label=args.gene_id_heading, probeset_label=args.probeset_heading, exp_label=args.expression_heading, allow_probeset_many_gene=args.allow_probeset_many_gene, validate=True, sep=args.sep) elif sample_types[sample_type] == sample_types['diff_expr']: # validation breaks with some of Rohan's diff files # he's included all probesets but only the mean score, once. expr_table = gene_expr_diff_to_table( args.expression_data, stable_id_label=args.gene_id_heading, probeset_label=args.probeset_heading, exp_label=args.expression_heading, sig_label=args.significance_heading, pval_label=args.p_value_heading, allow_probeset_many_gene=args.allow_probeset_many_gene, validate=False, sep=args.sep) elif sample_types[sample_type] == sample_types['target_genes']: expr_table = LoadTable(args.expression_data, sep=args.sep) else: rr.dieOnCritical('Unknown sample type', args.sample_type) success = add_data(session, name, description, args.expression_data, expr_table, sample_type=args.sample_type, reffile1=args.reffile1, reffile2=args.reffile2) rr.addInfo(name + ' added to DB', success) rr.display()
def PslToTable(data): """converts psl format to a table""" parser = MinimalPslParser(data) version = parser.next() header = parser.next() rows = [row for row in parser] table = LoadTable(header=header, rows=rows, title=version) return table
def _load(self, filename): """loads attributes from a gzipped, .npy data structure or a tab delimited cogent table""" try: infile = gzip.GzipFile(filename, 'r') data = numpy.load(infile) infile.close() # remember numpy.load() returns and array object # numpy.load().tolist() returns a dict ... wtf ! data = data.tolist() for name in data: value = data[name] self.__dict__[name] = value if (name == 'ranks' or name == 'counts') and value is not None: self.__dict__[name] = value.astype(float) if name == 'labels' and value is not None: self.__dict__[name] = value.astype(str) except Exception as e: print "Trying to load from table" data = LoadTable(filename, sep='\t') # convert table to collection here. ls = [] rs = [] cs = [] for row in data.getRawData(): l =numpy.unicode(row[0]) r = numpy.float(row[1]) c = numpy.array(row[2:len(row)], dtype=numpy.float32) ls.append(l) rs.append(r) cs.append(c) self.labels = numpy.array(ls) self.ranks = numpy.array(rs) self.counts = numpy.array(cs) self.N = self.counts.shape[0]
def update_trait_dict_from_file(table_file, header = [],input_sep="\t"): """Update a trait dictionary from a table file table_file -- File name of a trait table. The first line should be a header line, with column headers equal to trait (e.g. gene family) names, while the row headers should be organism ids that match the tree. trait_dict -- a dictionary of traits, keyed by organism. Items in trait dict will be overwritten if present. """ #First line should be headers table=LoadTable(filename=table_file,header=True,sep=input_sep) #do some extra stuff to match columns if a header is provided if header: #error checking to make sure traits in ASR table are a subset of traits in genome table if set(header) != set(table.Header[1:]): if set(header).issubset(set(table.Header[1:])): diff_traits = set(table.Header[1:]).difference(set(header)) warn("Missing traits in given ASR table with labels:{0}. Predictions will not be produced for these traits.".format(list(diff_traits))) else: raise RuntimeError("Given ASR trait table contains one or more traits that do not exist in given genome trait table. Predictions can not be made.") #Note: keep the first column heading at the beginning not sorted (this is the name for the row ids sorted_header=[table.Header[0]] sorted_header.extend(header) table = table.getColumns(sorted_header) traits = {} for fields in table: try: traits[fields[0]] = map(float,fields[1:]) except ValueError: err_str =\ "Could not convert trait table fields:'%s' to float" %(fields[1:]) raise ValueError(err_str) return table.Header[1:],traits
def BowtieToTable(data, row_converter=row_converter): """Converts bowtie output to a table Arguments: - row_converter: if not provided, uses a default converter which casts the Offset and Other Matches fields to ints. If set to None, all returned data will be strings (this is faster). """ parser = BowtieOutputParser(data, row_converter=row_converter) header = parser.next() rows = [row for row in parser] table = LoadTable(header=header, rows=rows) return table
def getMessageTable(self, last_n_lines=None, include_date=False): """ Read the ChipPy.log file return as table, returning only the last n lines if passed an int. """ log_file = open(self.log_path) records = [] for line in log_file: line = line.strip() if len(line) > 0: if include_date: records.append(line.split('\t')[0:]) else: records.append(line.split('\t')[1:]) # don't display date log_file.close() if records == []: return None if include_date: header = ['Date/time', 'code_block', 'level', 'message', 'value'] else: header = ['code_block', 'level', 'message', 'value'] if type(last_n_lines) is int: # return only last n lines of log file try: table = LoadTable(header=header, rows=records[-last_n_lines:], sep='\t') except IndexError: table = None else: try: table = LoadTable(header=header, rows=records, sep='\t') except IndexError: table = None return table
def SummaryStats(self): """Return a summary table with the statistic value(s) calculated for the the full data-set, the jackknife statistics and standard errors.""" # if the statistics haven't been run yet. if self._jackknifed_stat is None: self.jackknife() header = ['Sample Stat', 'Jackknife Stat', 'Standard Error'] title = 'Summary Statistics' rows = np.vstack((self._sample_statistic, self._jackknifed_stat, self._standard_error)) rows = rows.transpose() return LoadTable(header=header, rows=rows, title=title)
def tabulate(d, transpose=False, key_fun=None): """ d is a dictionary, keyed by tuple(A, B). Goal is to put A in rows, B in columns, report data in table form. >>> d = {(1,'a'):3, (1,'b'):4, (2,'a'):5, (2,'b'):0} >>> print tabulate(d) =========== o a b ----------- 1 3 4 2 5 0 ----------- >>> print tabulate(d, transpose=True) =========== o 1 2 ----------- a 3 5 b 4 0 ----------- """ from cogent import LoadTable pairs = d.keys() rows, cols = zip(*pairs) if transpose: rows, cols = cols, rows rows = sorted(set(rows)) cols = sorted(set(cols)) header = ["o"] + list(cols) table = [] for r in rows: combo = [(r, c) for c in cols] if transpose: combo = [(c, r) for (r, c) in combo] data = [d[x] for x in combo] data = ["{0:.1f}".format(x) if isinstance(x, float) else x \ for x in data] if key_fun: data = [key_fun(x) for x in data] table.append([str(r)] + data) table = LoadTable(header=header, rows=table) return table
def _get_count_sum_table_per_chrom(counts, genes, upstream_size): """returns table of total counts for upstream, exon, intron coords """ rows = [] header = ['region_type', 'ensembl_id', 'region_rank', 'counts', 'size'] for gene in genes: # if no intron, we discard if len(gene.IntronCoords) == 0: continue rows += _get_exon_counts(gene, counts) rows += _get_intron_counts(gene, counts) rows += _get_upstream_counts(gene, counts, upstream_size) table = LoadTable(header=header, rows=rows) return table
def calcGStatistic(self, likelihoods, return_table=False): # A Goodness-of-fit statistic (self, likelihoods) = self.parallelReconstructColumns(likelihoods) unambig = (self.ambig == 1.0).nonzero()[0] observed = self.counts[unambig].astype(int) expected = likelihoods[unambig] * observed.sum() #chisq = ((observed-expected)**2 / expected).sum() G = 2 * observed.dot(numpy.log(observed/expected)) if return_table: motifs = self.getSitePatterns(unambig) rows = list(zip(motifs, observed, expected)) rows.sort(key=lambda row:(-row[1], row[0])) table = LoadTable(header=['Pattern', 'Observed', 'Expected'], rows=rows, row_ids=True) return (G, table) else: return G
def wagner_for_picrust(tree_path, trait_table_path, gain=None, max_paralogs=None, HALT_EXEC=False): '''Runs count application controller given path of tree and trait table and returns a Table''' #initialize Count app controller count = Count(HALT_EXEC=HALT_EXEC) #set the parameters if gain: count.Parameters['-gain'].on(gain) if max_paralogs: count.Parameters['-max_paralogs'].on(max_paralogs) ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes. table = LoadTable(filename=trait_table_path, header=True, sep='\t') #get the first column (containing row ids) genome_ids = table.getRawData(table.Header[0]) #remove single quotes from the id if they exist genome_ids = [str(id).strip('\'') for id in genome_ids] #transpose the matrix table = table.transposed(new_column_name=table.Header[0]) #Change the headers table = table.withNewHeader(table.Header[1:], genome_ids) #write the modified table to a tmp file tmp_table_path = get_tmp_filename() table.writeToFile(tmp_table_path, sep='\t') #Run Count here result = count(data=(tree_path, tmp_table_path)) #Remove tmp file remove(tmp_table_path) #tree=LoadTree(tree_path) tree = DndParser(open(tree_path)) #parse the results into a Cogent Table asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(), remove_num_tips=len(tree.tips())) #transpose the table asr_table = asr_table.transposed(new_column_name='nodes') return asr_table
def _get_stats(self, stat, transform=None, **kwargs): """returns a table for the indicated statistics""" if self._dists is None: return None rows = [] for row_name in self.Names: row = [row_name] for col_name in self.Names: if row_name == col_name: row.append('') continue val = self._dists[(row_name, col_name)][stat] if transform is not None: val = transform(val) row.append(val) rows.append(row) header = [r'Seq1 \ Seq2'] + self.Names table = LoadTable(header=header, rows=rows, row_ids = True, missing_data='*', **kwargs) return table
def wagner_for_picrust(tree_path,trait_table_path,gain=None,max_paralogs=None,HALT_EXEC=False): '''Runs count application controller given path of tree and trait table and returns a Table''' #initialize Count app controller count=Count(HALT_EXEC=HALT_EXEC) #set the parameters if gain: count.Parameters['-gain'].on(gain) if max_paralogs: count.Parameters['-max_paralogs'].on(max_paralogs) ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes. table = LoadTable(filename=trait_table_path,header=True,sep='\t') #get the first column (containing row ids) genome_ids = table.getRawData(table.Header[0]) #remove single quotes from the id if they exist genome_ids=[str(id).strip('\'') for id in genome_ids] #transpose the matrix table = table.transposed(new_column_name=table.Header[0]) #Change the headers table=table.withNewHeader(table.Header[1:],genome_ids) #write the modified table to a tmp file tmp_table_path =get_tmp_filename() table.writeToFile(tmp_table_path,sep='\t') #Run Count here result = count(data=(tree_path,tmp_table_path)) #Remove tmp file remove(tmp_table_path) #tree=LoadTree(tree_path) tree=DndParser(open(tree_path)) #parse the results into a Cogent Table asr_table= parse_wagner_parsimony_output(result["StdOut"].readlines(),remove_num_tips=len(tree.tips())) #transpose the table asr_table = asr_table.transposed(new_column_name='nodes') return asr_table
import sys sys.path.extend(['..']) from cogent import LoadTable from cogent.util.unit_test import TestCase, main from cogent.util.misc import remove_files from chippy.parse.expr_data import _check_expr_headers, _check_diff_headers,\ _validate_probes_scores, _remove_multimapped_probesets, _read_data_file _sample_dump = LoadTable(header=['ENSEMBL', 'probeset', 'exp'], rows=[['id1',"0|1|2","13.6|13.4|13.6"], ['id2',"3|1","9.9|13.6"], # this gene should be lost when filtered ['id3',"4|5","12.7|13.4"], ['id4',"6","13.4"], ['id5',"7|8|3","6.0|6.0|4.5"], ['id6',"9|10|11|12","5.4|6.8|6.6|6.2"], ['id8',"13","12.7"], ['id9',"14","12.7"], ['id10',"15","12.7"]]) class TestExprParsing(TestCase): """test that excluding probesets works correctly""" def test_check_expr_headers(self): """ check that headers are identified corrects, as is the presence/ absence of a probeset column label. Make sure it fails if the columns are incorrectly ordered or labelled. """ header_row = ['ENSEMBL', 'probeset', 'exp'] gene_col, probe_col, exp_col, probes_present = _check_expr_headers(
def run_asr_in_parallel(tree, table, asr_method, parallel_method='sge',tmp_dir='jobs/',num_jobs=100, verbose=False): '''Runs the ancestral state reconstructions in parallel''' asr_script_fp = join(get_picrust_project_dir(),'scripts','ancestral_state_reconstruction.py') if(parallel_method=='sge'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs_sge.py') elif(parallel_method=='multithreaded'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs.py') elif(parallel_method=='torque'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_jobs_torque.py') else: raise RuntimeError if(verbose): print "Loading trait table..." #foreach trait in the table, create a new tmp file with just that trait, and create the job command and add it a tmp jobs file table=LoadTable(filename=table, header=True, sep='\t') #get dimensions of the table dim=table.Shape created_tmp_files=[] output_files=[] ci_files=[] #create a tmp file to store the job commands (which we will pass to our parallel script to run) jobs_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='jobs_asr_') jobs=open(jobs_fp,'w') created_tmp_files.append(jobs_fp) if(verbose): print "Creating temporary input files in: ",tmp_dir #iterate over each column for i in range(1,dim[1]): #create a new table with only a single trait single_col_table=table.getColumns([0,i]) #write the new table to a tmp file single_col_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='in_asr_') single_col_table.writeToFile(single_col_fp,sep='\t') created_tmp_files.append(single_col_fp) #create tmp output files tmp_output_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_') output_files.append(tmp_output_fp) tmp_ci_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_ci_') ci_files.append(tmp_ci_fp) #create the job command cmd= "{0} -i {1} -t {2} -m {3} -o {4} -c {5}".format(asr_script_fp, single_col_fp, tree, asr_method, tmp_output_fp, tmp_ci_fp) #add job command to the the jobs file jobs.write(cmd+"\n") jobs.close() created_tmp_files.extend(output_files) created_tmp_files.extend(ci_files) if(verbose): print "Launching parallel jobs." #run the job command job_prefix='asr' submit_jobs(cluster_jobs_fp ,jobs_fp,job_prefix,num_jobs=num_jobs) if(verbose): print "Jobs are now running. Will wait until finished." #wait until all jobs finished (e.g. simple poller) wait_for_output_files(output_files) if(verbose): print "Jobs are done running. Now combining all tmp files." #Combine output files combined_table=combine_asr_tables(output_files) combined_ci_table=combine_asr_tables(ci_files) #create a Table object combined_table=Table(header=combined_table[0],rows=combined_table[1:]) combined_ci_table=Table(header=combined_ci_table[0],rows=combined_ci_table[1:]) #clean up all tmp files for file in created_tmp_files: remove(file) #return the combined table return combined_table,combined_ci_table
def loadtable(header, rows): from cogent import LoadTable return LoadTable(header=header, rows=rows)
def run_asr_in_parallel(tree, table, asr_method, parallel_method='sge',tmp_dir='jobs/',num_jobs=100, verbose=False): '''Runs the ancestral state reconstructions in parallel''' asr_script_fp = join(get_picrust_project_dir(),'scripts','ancestral_state_reconstruction.py') if(parallel_method=='sge'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_picrust_jobs_sge.py') elif(parallel_method=='multithreaded'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_picrust_jobs.py') elif(parallel_method=='torque'): cluster_jobs_fp=join(get_picrust_project_dir(),'scripts','start_parallel_picrust_jobs_torque.py') else: raise RuntimeError if(verbose): print "Loading trait table..." #foreach trait in the table, create a new tmp file with just that trait, and create the job command and add it a tmp jobs file table=LoadTable(filename=table, header=True, sep='\t') #get dimensions of the table dim=table.Shape created_tmp_files=[] output_files=[] ci_files=[] #create a tmp file to store the job commands (which we will pass to our parallel script to run) jobs_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='jobs_asr_') jobs=open(jobs_fp,'w') created_tmp_files.append(jobs_fp) if(verbose): print "Creating temporary input files in: ",tmp_dir #iterate over each column for i in range(1,dim[1]): #create a new table with only a single trait single_col_table=table.getColumns([0,i]) #write the new table to a tmp file single_col_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='in_asr_') single_col_table.writeToFile(single_col_fp,sep='\t') created_tmp_files.append(single_col_fp) #create tmp output files tmp_output_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_') output_files.append(tmp_output_fp) tmp_ci_fp=get_tmp_filename(tmp_dir=tmp_dir,prefix='out_asr_ci_') ci_files.append(tmp_ci_fp) #create the job command cmd= "{0} -i {1} -t {2} -m {3} -o {4} -c {5}".format(asr_script_fp, single_col_fp, tree, asr_method, tmp_output_fp, tmp_ci_fp) #add job command to the the jobs file jobs.write(cmd+"\n") jobs.close() created_tmp_files.extend(output_files) created_tmp_files.extend(ci_files) if(verbose): print "Launching parallel jobs." #run the job command job_prefix='asr' submit_jobs(cluster_jobs_fp ,jobs_fp,job_prefix,num_jobs=num_jobs) if(verbose): print "Jobs are now running. Will wait until finished." #wait until all jobs finished (e.g. simple poller) wait_for_output_files(output_files) if(verbose): print "Jobs are done running. Now combining all tmp files." #Combine output files combined_table=combine_asr_tables(output_files) combined_ci_table=combine_asr_tables(ci_files) #create a Table object combined_table=Table(header=combined_table[0],rows=combined_table[1:]) combined_ci_table=Table(header=combined_ci_table[0],rows=combined_ci_table[1:]) #clean up all tmp files for file in created_tmp_files: remove(file) #return the combined table return combined_table,combined_ci_table