Exemple #1
0
parser.add_argument('-o', '--out-dir', help='Output directory')
args = parser.parse_args()

if args.out_dir:
    if not os.path.exists(args.out_dir):
        os.makedirs(args.out_dir)
    elif not os.path.isdir(args.out_dir):
        raise IOError('Output path exists and is not a directory')

if args.verbose:
    print 'Getting sequence index'
seq_index = SeqIO.index(args.fasta, 'fasta')

if args.verbose:
    print 'Reading clustering'
tt = ttable.read_mcl(args.clustering)
cl2seq = tt.invert()


if len(args.cid_list) > 0:
    cid_list = args.cid_list
else:
    cid_list = [ci for ci in cl2seq]

for ci in cid_list:

    if args.verbose:
        print 'Collecting sequences for cluster {0}'.format(ci)

    seqs = []
    for si in cl2seq[int(ci)]:
Exemple #2
0
                        type=int,
                        help='minimum truth object score',
                        default=0)
    parser.add_argument('truth', nargs=1, help='truth table in yaml format')
    parser.add_argument('pred', nargs=1, help='prediction in MCL format')
    parser.add_argument('output',
                        nargs='?',
                        type=argparse.FileType('w'),
                        default=sys.stdout,
                        help='Output file')
    args = parser.parse_args()

    print 'Reading truth table...'
    truth = tt.read_truth(args.truth[0], args.min_score)
    print 'Reading prediction...'
    pred = tt.read_mcl(args.pred[0])

    print 'Creating contingency table...'
    ct = tt.crosstab(truth.hard(), pred.hard())

    print
    print 'Contigency table [rows=truth, cols=prediction] contains {0} elements'.format(
        ct.shape[0] * ct.shape[1])
    print_table(ct)
    print

    if over_clustered(ct):
        add_padding_columns(ct)
        print 'Squaring table with dummy classes'
        print_table(ct)
        print
Exemple #3
0
                       index=['Sum'] + df.index.values.tolist())


if __name__ == '__main__':

    parser = argparse.ArgumentParser(description='Calculate F1 metric')
    parser.add_argument('-s', '--min-score', type=int, help='minimum truth object score', default=0)
    parser.add_argument('truth', nargs=1, help='truth table in yaml format')
    parser.add_argument('pred', nargs=1, help='prediction in MCL format')
    parser.add_argument('output', nargs='?', type=argparse.FileType('w'), default=sys.stdout, help='Output file')
    args = parser.parse_args()

    print 'Reading truth table...'
    truth = tt.read_truth(args.truth[0], args.min_score)
    print 'Reading prediction...'
    pred = tt.read_mcl(args.pred[0])

    print 'Creating contingency table...'
    ct = tt.crosstab(truth.hard(), pred.hard())

    print
    print 'Contigency table [rows=truth, cols=prediction] contains {0} elements'.format(ct.shape[0] * ct.shape[1])
    print_table(ct)
    print

    if over_clustered(ct):
        add_padding_columns(ct)
        print 'Squaring table with dummy classes'
        print_table(ct)
        print
Exemple #4
0
            raise RuntimeWarning('Truth table contains no assignments: {0}'.format(args.truth[0]))

        # collect object weights if requested
        weights = truth.get_weights() if args.weighted else None

        if args.verbose:
            print 'Truth Statistics'
            truth.print_tally()

        if args.hard:
            truth = truth.hard(True)
        else:
            truth = truth.soft(True)

        # read clustering and convert to basic soft table
        clustering = tt.read_mcl(args.pred)
        if len(clustering) == 0:
            raise RuntimeWarning('Clustering contains no assignments: {0}'.format(args.pred[0]))

        if args.verbose:
            print 'Clustering Statistics'
            clustering.print_tally()
        clustering = clustering.soft(True)

    except RuntimeWarning as wn:
        write_msg(args.output, wn.message)
        sys.exit(0)

    result = bcubed_F(truth, clustering, weights)
    pipeline_utils.write_to_stream(args.output, result)