Esempio n. 1
0
def view(args):
  g = load_from_fasta_tsv(args.fasta, args.edges, args.containment)
  # save_to_fasta_tsv(g, 'test.fasta', 'test.tsv', 'test.containment')
  # g = load_from_fasta_tsv('test.fasta', 'test.tsv', 'test.containment')

  if args.check_correctness:
    visualize_correctness(g)

  if args.vertex:
    print_vertex(g.vertex_from_id(args.vertex))
  if args.edge:
    print_connection(g.get_edge(args.edge))
  
  if args.dot:
    to_graphviz_dot(g, args.dot)
  if args.gfa:
    save_bandage_gfa(g, args.gfa)
Esempio n. 2
0
def view(args):
    g = load_from_fasta_tsv(args.fasta, args.edges, args.containment)
    # save_to_fasta_tsv(g, 'test.fasta', 'test.tsv', 'test.containment')
    # g = load_from_fasta_tsv('test.fasta', 'test.tsv', 'test.containment')

    if args.check_correctness:
        visualize_correctness(g)

    if args.vertex:
        print_vertex(g.vertex_from_id(args.vertex))
    if args.edge:
        print_connection(g.get_edge(args.edge))

    if args.dot:
        to_graphviz_dot(g, args.dot)
    if args.gfa:
        save_bandage_gfa(g, args.gfa)
Esempio n. 3
0
def scaffold(args):
  logging.info('Creating the scaffold graph')
  g = load_from_fasta_tsv(args.fasta, args.edges, args.containment)
  print_stats(g)

  # delete small vertices
  if args.min_ctg_len:
    logging.info('Removing vertices smaller than %d bp' % args.min_ctg_len)
    n_removed = 0
    for v in g.vertices:
      if len(v.seq) < args.min_ctg_len:
        g.remove_vertex(v)
        n_removed += 1
        logging.info('Removed %d vertices' % n_removed)
        print_stats(g)

  # prune scaffold edges
  if g.edges:
    logging.info('Simplifying the graph using paired-end reads')
    logging.info('Contracting unambigous paths')
    contract_edges(g, store_ordering=True)
    print_stats(g)
    save_fasta(g, 'contracted.fasta')
    
    if args.cut_tip_len:
      n_cut = cut_tips(g, d=args.cut_tip_len)
      logging.info('Cut %d tips shorter than %d bp' \
                    % (n_cut, args.cut_tip_len))
    
    logging.info('Pruning edges with low support')
    n_pruned1 = prune_scaffold_edges(g, abs_support_thr=args.pe_abs_thr, 
                                        rel_support_thr=args.pe_rel_thr)
    n_pruned2 = prune_scaffold_edges_via_wells(g, thr=args.pe_rc_rel_thr)
    logging.info('%d edges pruned' % (n_pruned1 + n_pruned2))

    logging.info('Contracting unambigous paths')
    n_contracted = contract_edges(g)
    print_stats(g)

  # delete all existing edges from the graph
  E = g.edges
  for e in E:
    g.remove_edge(e)

  # create new edges whenever vertices have similar well profiles
  logging.info('Creating edges from read clouds')
  n_edges = make_wellscaff_edges(g, min_common=args.rc_abs_thr, 
                                    min_thr=args.rc_rel_edge_thr)
  logging.info('%d scaffold edges from read clouds' % n_edges)

  logging.info('Auto-saving graph with prefix %s.wellscaff' % args.out)
  save_to_fasta_tsv(g, '%s.wellscaff.fasta' % args.out, 
                       '%s.wellscaff.tsv' % args.out, 
                       '%s.wellscaff.containment' % args.out)

  logging.info('Pruning edges with low support')
  n_pruned = prune_via_wells(g, min_common=args.rc_abs_thr, 
                                min_thr=args.rc_rel_prun_thr)
  logging.info('%d edges pruned' % n_pruned)

  logging.info('Contracting unambigous paths')
  n_contracted = contract_edges(g, store_ordering=True)
  print_stats(g)

  logging.info('Saving scaffolding results')
  save_fasta(g, '%s.fasta' % args.out)
  save_ordering(g, '%s.ordering' % args.out)
Esempio n. 4
0
def scaffold(args):
    logging.info('Creating the scaffold graph')
    g = load_from_fasta_tsv(args.fasta, args.edges, args.containment)
    print_stats(g)

    # delete small vertices
    if args.min_ctg_len:
        logging.info('Removing vertices smaller than %d bp' % args.min_ctg_len)
        n_removed = 0
        for v in g.vertices:
            if len(v.seq) < args.min_ctg_len:
                g.remove_vertex(v)
                n_removed += 1
                logging.info('Removed %d vertices' % n_removed)
                print_stats(g)

    # prune scaffold edges
    if g.edges:
        logging.info('Simplifying the graph using paired-end reads')
        logging.info('Contracting unambigous paths')
        contract_edges(g, store_ordering=True)
        print_stats(g)
        save_fasta(g, 'contracted.fasta')

        if args.cut_tip_len:
            n_cut = cut_tips(g, d=args.cut_tip_len)
            logging.info('Cut %d tips shorter than %d bp' \
                          % (n_cut, args.cut_tip_len))

        logging.info('Pruning edges with low support')
        n_pruned1 = prune_scaffold_edges(g,
                                         abs_support_thr=args.pe_abs_thr,
                                         rel_support_thr=args.pe_rel_thr)
        n_pruned2 = prune_scaffold_edges_via_wells(g, thr=args.pe_rc_rel_thr)
        logging.info('%d edges pruned' % (n_pruned1 + n_pruned2))

        logging.info('Contracting unambigous paths')
        n_contracted = contract_edges(g)
        print_stats(g)

    # delete all existing edges from the graph
    E = g.edges
    for e in E:
        g.remove_edge(e)

    # create new edges whenever vertices have similar well profiles
    logging.info('Creating edges from read clouds')
    n_edges = make_wellscaff_edges(g,
                                   min_common=args.rc_abs_thr,
                                   min_thr=args.rc_rel_edge_thr)
    logging.info('%d scaffold edges from read clouds' % n_edges)

    logging.info('Auto-saving graph with prefix %s.wellscaff' % args.out)
    save_to_fasta_tsv(g, '%s.wellscaff.fasta' % args.out,
                      '%s.wellscaff.tsv' % args.out,
                      '%s.wellscaff.containment' % args.out)

    logging.info('Pruning edges with low support')
    n_pruned = prune_via_wells(g,
                               min_common=args.rc_abs_thr,
                               min_thr=args.rc_rel_prun_thr)
    logging.info('%d edges pruned' % n_pruned)

    logging.info('Contracting unambigous paths')
    n_contracted = contract_edges(g, store_ordering=True)
    print_stats(g)

    logging.info('Saving scaffolding results')
    save_fasta(g, '%s.fasta' % args.out)
    save_ordering(g, '%s.ordering' % args.out)