parser.add_argument('-o', '--out-dir', help='Output directory') args = parser.parse_args() if args.out_dir: if not os.path.exists(args.out_dir): os.makedirs(args.out_dir) elif not os.path.isdir(args.out_dir): raise IOError('Output path exists and is not a directory') if args.verbose: print 'Getting sequence index' seq_index = SeqIO.index(args.fasta, 'fasta') if args.verbose: print 'Reading clustering' tt = ttable.read_mcl(args.clustering) cl2seq = tt.invert() if len(args.cid_list) > 0: cid_list = args.cid_list else: cid_list = [ci for ci in cl2seq] for ci in cid_list: if args.verbose: print 'Collecting sequences for cluster {0}'.format(ci) seqs = [] for si in cl2seq[int(ci)]:
type=int, help='minimum truth object score', default=0) parser.add_argument('truth', nargs=1, help='truth table in yaml format') parser.add_argument('pred', nargs=1, help='prediction in MCL format') parser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='Output file') args = parser.parse_args() print 'Reading truth table...' truth = tt.read_truth(args.truth[0], args.min_score) print 'Reading prediction...' pred = tt.read_mcl(args.pred[0]) print 'Creating contingency table...' ct = tt.crosstab(truth.hard(), pred.hard()) print print 'Contigency table [rows=truth, cols=prediction] contains {0} elements'.format( ct.shape[0] * ct.shape[1]) print_table(ct) print if over_clustered(ct): add_padding_columns(ct) print 'Squaring table with dummy classes' print_table(ct) print
index=['Sum'] + df.index.values.tolist()) if __name__ == '__main__': parser = argparse.ArgumentParser(description='Calculate F1 metric') parser.add_argument('-s', '--min-score', type=int, help='minimum truth object score', default=0) parser.add_argument('truth', nargs=1, help='truth table in yaml format') parser.add_argument('pred', nargs=1, help='prediction in MCL format') parser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='Output file') args = parser.parse_args() print 'Reading truth table...' truth = tt.read_truth(args.truth[0], args.min_score) print 'Reading prediction...' pred = tt.read_mcl(args.pred[0]) print 'Creating contingency table...' ct = tt.crosstab(truth.hard(), pred.hard()) print print 'Contigency table [rows=truth, cols=prediction] contains {0} elements'.format(ct.shape[0] * ct.shape[1]) print_table(ct) print if over_clustered(ct): add_padding_columns(ct) print 'Squaring table with dummy classes' print_table(ct) print
raise RuntimeWarning('Truth table contains no assignments: {0}'.format(args.truth[0])) # collect object weights if requested weights = truth.get_weights() if args.weighted else None if args.verbose: print 'Truth Statistics' truth.print_tally() if args.hard: truth = truth.hard(True) else: truth = truth.soft(True) # read clustering and convert to basic soft table clustering = tt.read_mcl(args.pred) if len(clustering) == 0: raise RuntimeWarning('Clustering contains no assignments: {0}'.format(args.pred[0])) if args.verbose: print 'Clustering Statistics' clustering.print_tally() clustering = clustering.soft(True) except RuntimeWarning as wn: write_msg(args.output, wn.message) sys.exit(0) result = bcubed_F(truth, clustering, weights) pipeline_utils.write_to_stream(args.output, result)