Example #1
0
def main():
    import argparse
    parser = argparse.ArgumentParser('Harness for alignment free homology.', add_help=False,
                                     epilog='')
    parser.add_argument('--input', type=argparse.FileType('rb'), default=sys.stdin,
                        help='Input file e.g. hg18.toDanRer5.seqs.txt.')
    parser.add_argument('-a', type=int, default=6, help='Field number of A (training) sequences.')
    parser.add_argument('-b', type=int, default=8, help='Field number of B (test) sequences.')
    parser.add_argument('-c', type=int, default=5, help='Field number of test coordinates.')
    parser.add_argument('--valid', type=int, default=9, help='Field number of valid test coordinates.')
    parser.add_argument('-l', type=int, default=None, help='Length of scanning window. Defaults to the average of training sequences.')

    subparsers = parser.add_subparsers(help='Model algorithm to use.', dest='model')
    d2z_parser = subparsers.add_parser('d2z', help='D2z scoring metric.')
    d2z_parser = add_d2z_arguments(d2z_parser)
    hexmcd_parser = subparsers.add_parser('hexmcd', help='HexMCD algorithm.')
    hexmcd_parser = add_hexmcd_arguments(hexmcd_parser)
    # Add more parsers here.

    OPTS = parser.parse_args()
    line_tups = read_fields(f=OPTS.input)

    if OPTS.model == 'd2z':
        m = D2z(**vars(OPTS))
    if OPTS.model == 'hexmcd':
        m = HexMCD(bg_list=[l[6] for l in line_tups], smoothing='ones', **vars(OPTS))
    row_search(OPTS, m, line_tups)
Example #2
0
def main():
    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument('identifier', help='Identifier name e.g. cne.100899.FST.')
    parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
                        help='Input file e.g. d2z.dat.')
    parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt',
                        help='Extra data file e.g. hg18.toDanRer5.seqs.txt.')
    parser.add_argument('--valid', type=int, default=9, help='Field number of valid test coordinates.')
    OPTS = parser.parse_args()
    cne_dict = parse_dat(read_fields(f=OPTS.file))
    line_tups = read_fields(f=OPTS.extra_data)
    cne_valids = {}
    for l in line_tups:
        danrer_co, valid_co = parse_coords(l[4]), parse_coords(l[OPTS.valid-1])
        valid_indices = (valid_co['start'] - danrer_co['start'], valid_co['end'] - danrer_co['start'])
        cne_valids[l[0]] = valid_indices
    plot_cne(OPTS.identifier, cne_dict[OPTS.identifier], cne_valids[OPTS.identifier])
Example #3
0
def main():
  import argparse
  parser = argparse.ArgumentParser()
  parser.add_argument('-f', '--file', nargs='?', type=argparse.FileType('r'), default=sys.stdin,
                      help='Input file e.g. d2z.dat.')
  parser.add_argument('--extra_data', type=argparse.FileType('r'), default='hg18.toDanRer5.seqs.txt',
                    help='Extra data file e.g. hg18.toDanRer5.seqs.txt.')
  parser.add_argument('scoring', choices=['ranked_peaks', 'overlap'])
  OPTS = parser.parse_args()
  line_tups = read_fields(f=OPTS.file)
  cne_dict = parse_dat(line_tups)
  extra = parse_extra_data(read_fields(f=OPTS.extra_data))
  if OPTS.scoring == 'ranked_peaks':
    results = ranked_peaks(cne_dict, extra)
  elif OPTS.scoring == 'overlap':
    results = overlap(cne_dict, extra)
  for cne, result in sorted(results.iteritems()):
    sys.stdout.write('\t'.join([cne, str(result['rank']),
                                str(result['places'])]) + '\n')
Example #4
0
def main():
    import argparse
    parser = argparse.ArgumentParser(description='Compute d2z scores.')
    parser = add_d2z_arguments(parser, main=True)
    OPTS = parser.parse_args()
    line_tups = read_fields()
    a_seqs = [l[OPTS.a-1] for l in line_tups]
    clf = D2z()
    clf.fit(a_seqs)
    for l in line_tups:
        name, b = l[0], l[OPTS.b-1]
        sys.stdout.write('%s\t%s\n' % (name, clf.scan([b])[0]))