def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent(
             '''
             output files:
                 germ-pass
                    database with assigned germline sequences.
                 germ-fail
                    database with records failing germline assignment.

             required fields:
                 SEQUENCE_ID, SEQUENCE_INPUT, SEQUENCE_VDJ or SEQUENCE_IMGT,
                 V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL,
                 V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_IMGT, V_GERM_LENGTH_IMGT,
                 D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH,
                 J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH
              
             optional fields:
                 CLONE
                
             output fields:
                 GERMLINE_VDJ, GERMLINE_VDJ_D_MASK, GERMLINE_VDJ_V_REGION,
                 GERMLINE_IMGT, GERMLINE_IMGT_D_MASK, GERMLINE_IMGT_V_REGION
              ''')

    # Parent parser
    parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,
                                       annotation=False)
    # Define argument parser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            parents=[parser_parent],
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
                                     
    parser.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                        help='List of folders and/or fasta files with germline sequences.')
    parser.add_argument('-g', action='store', dest='germ_types', default=default_germ_types,
                        nargs='+', choices=('full', 'dmask', 'vonly'),
                        help='Specify type(s) of germlines to include full germline, \
                              germline with D-region masked, or germline for V region only.')
    parser.add_argument('--cloned', action='store_true', dest='cloned',
                        help='Specify to create only one germline per clone \
                             (assumes input file is sorted by clone column)')
    parser.add_argument('--vf', action='store', dest='v_field', default=default_v_field,
                        help='Specify field to use for germline V call')
    parser.add_argument('--sf', action='store', dest='seq_field', default=default_seq_field,
                        help='Specify field to use for sequence')

    return parser
Esempio n. 2
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent(
             '''
             output files:
                 sequences
                     FASTA formatted sequences output from the subcommands fasta and clip.
                 <field>-<value>
                     database files partitioned by annotation <field> and <value>.
                 parse-<command>
                     output of the database modification functions where <command> is one of
                     the subcommands add, index, drop, delete, rename, select, sort or update.

             required fields:
                 SEQUENCE_ID
                 
             optional fields:
                 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ,
                 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK,
                 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION
                
             output fields:
                 None
             ''')
    
    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
                                       help='Database operation')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Define parent parser
    parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True,
                                       failed=False, log=False)

    # Subparser to convert database entries to sequence file
    parser_seq = subparsers.add_parser('fasta', parents=[parser_parent],
                                       formatter_class=CommonHelpFormatter,
                                       help='Creates a fasta file from database records')
    parser_seq.add_argument('--if', action='store', dest='id_field', 
                            default=default_id_field,
                            help='The name of the field containing identifiers')
    parser_seq.add_argument('--sf', action='store', dest='seq_field', 
                            default=default_seq_field,
                            help='The name of the field containing sequences')
    parser_seq.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
                            help='List of annotation fields to add to the sequence description')
    parser_seq.set_defaults(func=convertDbFasta)
    
    # Subparser to convert database entries to clip-fasta file
    parser_clip = subparsers.add_parser('clip', parents=[parser_parent], 
                                        formatter_class=CommonHelpFormatter,
                                        help='''Creates a clip-fasta file from database
                                             records, wherein germline sequences precede
                                             each clone and are denoted by ">>" headers.''')
    parser_clip.add_argument('--if', action='store', dest='id_field', 
                             default=default_id_field,
                             help='The name of the field containing identifiers')
    parser_clip.add_argument('--sf', action='store', dest='seq_field',
                             default=default_seq_field,
                             help='The name of the field containing reads')
    parser_clip.add_argument('--gf', action='store', dest='germ_field',
                             default=default_germ_field,
                             help='The name of the field containing germline sequences')
    parser_clip.add_argument('--cf', action='store', dest='cluster_field', default=None,
                             help='The name of the field containing containing sorted clone IDs')
    parser_clip.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
                             help='List of annotation fields to add to the sequence description')
    parser_clip.set_defaults(func=convertDbClip)

    # Subparser to partition files by annotation values
    parser_split = subparsers.add_parser('split', parents=[parser_parent],
                                         formatter_class=CommonHelpFormatter,
                                         help='Splits database files by field values')
    parser_split.add_argument('-f', action='store', dest='field', type=str, required=True,
                              help='Annotation field by which to split database files.')
    parser_split.add_argument('--num', action='store', dest='num_split', type=float, default=None,
                              help='''Specify to define the field as numeric and group
                                   records by whether they are less than or at least
                                   (greater than or equal to) the specified value.''')
    parser_split.set_defaults(func=splitDbFile)

    # Subparser to add records
    parser_add = subparsers.add_parser('add', parents=[parser_parent],
                                       formatter_class=CommonHelpFormatter,
                                       help='Adds field and value pairs')
    parser_add.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
                               help='The name of the fields to add.')
    parser_add.add_argument('-u', nargs='+', action='store', dest='values', required=True,
                               help='The value to assign to all rows for each field.')
    parser_add.set_defaults(func=addDbFile)

    # Subparser to delete records
    parser_delete = subparsers.add_parser('delete', parents=[parser_parent], 
                                          formatter_class=CommonHelpFormatter,
                                          help='Deletes specific records')
    parser_delete.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
                               help='The name of the fields to check for deletion criteria.')
    parser_delete.add_argument('-u', nargs='+', action='store', dest='values', default=['', 'NA'],
                               help='''The values defining which records to delete. A value
                                    may appear in any of the fields specified with -f.''')
    parser_delete.add_argument('--logic', action='store', dest='logic',
                               choices=('any', 'all'), default='any',
                               help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).''')
    parser_delete.add_argument('--regex', action='store_true', dest='regex',
                               help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_delete.set_defaults(func=deleteDbFile)

    # Subparser to drop fields
    parser_drop = subparsers.add_parser('drop', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Deletes entire fields')
    parser_drop.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
                               help='The name of the fields to delete from the database.')
    parser_drop.set_defaults(func=dropDbFile)

    # Subparser to index fields
    parser_index = subparsers.add_parser('index', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Adds a numeric index field')
    parser_index.add_argument('-f', action='store', dest='field',
                              default=default_index_field,
                              help='The name of the index field to add to the database.')
    parser_index.set_defaults(func=indexDbFile)

    # Subparser to rename fields
    parser_rename = subparsers.add_parser('rename', parents=[parser_parent],
                                          formatter_class=CommonHelpFormatter,
                                          help='Renames fields')
    parser_rename.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
                               help='List of fields to rename.')
    parser_rename.add_argument('-k', nargs='+', action='store', dest='names', required=True,
                               help='List of new names for each field.')
    parser_rename.set_defaults(func=renameDbFile)

    # Subparser to select records
    parser_select = subparsers.add_parser('select', parents=[parser_parent],
                                          formatter_class=CommonHelpFormatter,
                                          help='Selects specific records')
    parser_select.add_argument('-f', nargs='+', action='store', dest='fields', required=True,
                               help='The name of the fields to check for selection criteria.')
    parser_select.add_argument('-u', nargs='+', action='store', dest='values', required=True,
                               help='''The values defining with records to select. A value
                                    may appear in any of the fields specified with -f.''')
    parser_select.add_argument('--logic', action='store', dest='logic',
                               choices=('any', 'all'), default='any',
                               help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).''')
    parser_select.add_argument('--regex', action='store_true', dest='regex',
                               help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_select.set_defaults(func=selectDbFile)

    # Subparser to sort file by records
    parser_sort = subparsers.add_parser('sort', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Sorts records by field values')
    parser_sort.add_argument('-f', action='store', dest='field', type=str, required=True,
                             help='The annotation field by which to sort records.')
    parser_sort.add_argument('--num', action='store_true', dest='numeric', default=False,
                             help='''Specify to define the sort column as numeric rather
                                  than textual.''')
    parser_sort.add_argument('--descend', action='store_true', dest='descend',
                             help='''If specified, sort records in descending, rather
                             than ascending, order by values in the target field.''')
    parser_sort.set_defaults(func=sortDbFile)

    # Subparser to update records
    parser_update = subparsers.add_parser('update', parents=[parser_parent],
                                       formatter_class=CommonHelpFormatter,
                                       help='Updates field and value pairs')
    parser_update.add_argument('-f', action='store', dest='field', required=True,
                               help='The name of the field to update.')
    parser_update.add_argument('-u', nargs='+', action='store', dest='values', required=True,
                               help='The values that will be replaced.')
    parser_update.add_argument('-t', nargs='+', action='store', dest='updates', required=True,
                               help='''The new value to assign to each selected row.''')
    parser_update.set_defaults(func=updateDbFile)

    return parser
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output fields
    fields = dedent(
             '''
             output files:
                 clone-pass
                     database with assigned clonal group numbers.
                 clone-fail
                     database with records failing clonal grouping.

             required fields:
                 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION_LENGTH

                 <field>
                     sequence field specified by the --sf parameter
                
             output fields:
                 CLONE
              ''')

    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
                                       help='Cloning method')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True
    
    # Parent parser    
    parser_parent = getCommonArgParser(seq_in=False, seq_out=False, db_in=True, 
                                       multiproc=True)
    
    # Distance cloning method
    parser_bygroup = subparsers.add_parser('bygroup', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='''Defines clones as having same V assignment,
                                              J assignment, and junction length with
                                              specified substitution distance model.''')
    parser_bygroup.add_argument('-f', nargs='+', action='store', dest='fields', default=None,
                             help='Additional fields to use for grouping clones (non VDJ)')
    parser_bygroup.add_argument('--mode', action='store', dest='mode', 
                             choices=('allele', 'gene'), default='gene', 
                             help='''Specifies whether to use the V(D)J allele or gene for
                                  initial grouping.''')
    parser_bygroup.add_argument('--act', action='store', dest='action', default='set',
                             choices=('first', 'set'),
                             help='''Specifies how to handle multiple V(D)J assignments
                                  for initial grouping.''')
    parser_bygroup.add_argument('--model', action='store', dest='model', 
                             choices=('aa', 'ham', 'm1n', 'hs1f', 'hs5f'),
                             default=default_bygroup_model,
                             help='''Specifies which substitution model to use for
                                  calculating distance between sequences. Where m1n is the
                                  mouse single nucleotide transition/trasversion model
                                  of Smith et al, 1996; hs1f is the human single
                                  nucleotide model derived from Yaari et al, 2013; hs5f
                                  is the human S5F model of Yaari et al, 2013; ham is
                                  nucleotide Hamming distance; and aa is amino acid
                                  Hamming distance. The hs5f data should be
                                  considered experimental.''')
    parser_bygroup.add_argument('--dist', action='store', dest='distance', type=float, 
                             default=default_distance,
                             help='The distance threshold for clonal grouping')
    parser_bygroup.add_argument('--norm', action='store', dest='norm',
                             choices=('len', 'mut', 'none'), default=default_norm,
                             help='''Specifies how to normalize distances. One of none
                                  (do not normalize), len (normalize by length),
                                  or mut (normalize by number of mutations between sequences).''')
    parser_bygroup.add_argument('--sym', action='store', dest='sym',
                             choices=('avg', 'min'), default=default_sym,
                             help='''Specifies how to combine asymmetric distances. One of avg
                                  (average of A->B and B->A) or min (minimum of A->B and B->A).''')
    parser_bygroup.add_argument('--link', action='store', dest='linkage',
                             choices=('single', 'average', 'complete'), default=default_linkage,
                             help='''Type of linkage to use for hierarchical clustering.''')
    parser_bygroup.add_argument('--sf', action='store', dest='seq_field',
                                default=default_seq_field,
                                help='''The name of the field to be used to calculate
                                     distance between records''')
    parser_bygroup.set_defaults(feed_func=feedQueue)
    parser_bygroup.set_defaults(work_func=processQueue)
    parser_bygroup.set_defaults(collect_func=collectQueue)  
    parser_bygroup.set_defaults(group_func=indexJunctions)  
    parser_bygroup.set_defaults(clone_func=distanceClones)
    
    
    # Hierarchical clustering cloning method
    parser_hclust = subparsers.add_parser('hclust', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Defines clones by specified distance metric on CDR3s and \
                                              cutting of hierarchical clustering tree')
#     parser_hclust.add_argument('-f', nargs='+', action='store', dest='fields', default=None,
#                              help='Fields to use for grouping clones (non VDJ)')
    parser_hclust.add_argument('--method', action='store', dest='method', 
                             choices=('chen2010', 'ademokun2011'), default=default_hclust_model, 
                             help='Specifies which cloning method to use for calculating distance \
                                   between CDR3s, computing linkage, and cutting clusters')
    parser_hclust.set_defaults(feed_func=feedQueueClust)
    parser_hclust.set_defaults(work_func=processQueueClust)
    parser_hclust.set_defaults(collect_func=collectQueueClust)
    parser_hclust.set_defaults(cluster_func=hierClust)
        
    return parser
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define output file names and header fields
    fields = dedent('''
             output files:
                 align-pass
                     database with multiple aligned sequences.
                 align-fail
                     database with records failing alignment.

             required fields:
                 sequence_id, v_call, j_call
                 <field>
                     user specified sequence fields to align.

             output fields:
                 <field>_align
             ''')

    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__,
                            epilog=fields,
                            formatter_class=CommonHelpFormatter,
                            add_help=False)
    group_help = parser.add_argument_group('help')
    group_help.add_argument('--version',
                            action='version',
                            version='%(prog)s:' + ' %s %s' %
                            (__version__, __date__))
    group_help.add_argument('-h',
                            '--help',
                            action='help',
                            help='show this help message and exit')
    subparsers = parser.add_subparsers(title='subcommands',
                                       dest='command',
                                       metavar='',
                                       help='alignment method')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Parent parser
    parser_parent = getCommonArgParser(format=True, multiproc=True)

    # Argument parser for column-wise alignment across records
    parser_across = subparsers.add_parser(
        'across',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='''Multiple aligns sequence columns within groups 
                                                 and across rows using MUSCLE.'''
    )
    group_across = parser_across.add_argument_group('alignment arguments')
    group_across.add_argument(
        '--sf',
        nargs='+',
        action='store',
        dest='seq_fields',
        required=True,
        help='The sequence fields to multiple align within each group.')
    group_across.add_argument(
        '--gf',
        nargs='+',
        action='store',
        dest='group_fields',
        default=None,
        help='Additional (not allele call) fields to use for grouping.')
    group_across.add_argument(
        '--calls',
        nargs='+',
        action='store',
        dest='calls',
        choices=('v', 'd', 'j'),
        default=['v', 'j'],
        help='Segment calls (allele assignments) to use for grouping.')
    group_across.add_argument(
        '--mode',
        action='store',
        dest='mode',
        choices=('allele', 'gene'),
        default='gene',
        help='''Specifies whether to use the V(D)J allele or gene when
                                   an allele call field (--calls) is specified.'''
    )
    group_across.add_argument(
        '--act',
        action='store',
        dest='action',
        default='first',
        choices=('first', ),
        help='''Specifies how to handle multiple values within default
                                     allele call fields. Currently, only "first" is supported.'''
    )
    group_across.add_argument('--exec',
                              action='store',
                              dest='muscle_exec',
                              default=default_muscle_exec,
                              help='The location of the MUSCLE executable')
    parser_across.set_defaults(group_func=groupRecords, align_func=alignAcross)

    # Argument parser for alignment of fields within records
    parser_within = subparsers.add_parser(
        'within',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Multiple aligns sequence fields within rows using MUSCLE')
    group_within = parser_within.add_argument_group('alignment arguments')
    group_within.add_argument(
        '--sf',
        nargs='+',
        action='store',
        dest='seq_fields',
        required=True,
        help='The sequence fields to multiple align within each record.')
    group_within.add_argument('--exec',
                              action='store',
                              dest='muscle_exec',
                              default=default_muscle_exec,
                              help='The location of the MUSCLE executable')
    parser_within.set_defaults(group_func=None, align_func=alignWithin)

    # Argument parser for column-wise alignment across records
    parser_block = subparsers.add_parser(
        'block',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='''Multiple aligns sequence groups across both 
                                             columns and rows using MUSCLE.''')
    group_block = parser_block.add_argument_group('alignment arguments')
    group_block.add_argument(
        '--sf',
        nargs='+',
        action='store',
        dest='seq_fields',
        required=True,
        help='The sequence fields to multiple align within each group.')
    group_block.add_argument(
        '--gf',
        nargs='+',
        action='store',
        dest='group_fields',
        default=None,
        help='Additional (not allele call) fields to use for grouping.')
    group_block.add_argument(
        '--calls',
        nargs='+',
        action='store',
        dest='calls',
        choices=('v', 'd', 'j'),
        default=['v', 'j'],
        help='Segment calls (allele assignments) to use for grouping.')
    group_block.add_argument(
        '--mode',
        action='store',
        dest='mode',
        choices=('allele', 'gene'),
        default='gene',
        help='''Specifies whether to use the V(D)J allele or gene when
                                   an allele call field (--calls) is specified.'''
    )
    group_block.add_argument(
        '--act',
        action='store',
        dest='action',
        default='first',
        choices=('first', ),
        help='''Specifies how to handle multiple values within default
                                     allele call fields. Currently, only "first" is supported.'''
    )
    group_block.add_argument('--exec',
                             action='store',
                             dest='muscle_exec',
                             default=default_muscle_exec,
                             help='The location of the MUSCLE executable')
    parser_block.set_defaults(group_func=groupRecords, align_func=alignBlocks)

    return parser
Esempio n. 5
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent('''
             output files:
                 sequences
                     FASTA formatted sequences output from the subcommands fasta and clip.
                 <field>-<value>
                     database files partitioned by annotation <field> and <value>.
                 parse-<command>
                     output of the database modification functions where <command> is one of
                     the subcommands add, index, drop, delete, rename, select, sort or update.

             required fields:
                 sequence_id
             ''')

    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__,
                            epilog=fields,
                            formatter_class=CommonHelpFormatter,
                            add_help=False)
    group_help = parser.add_argument_group('help')
    group_help.add_argument('--version',
                            action='version',
                            version='%(prog)s:' + ' %s %s' %
                            (__version__, __date__))
    group_help.add_argument('-h',
                            '--help',
                            action='help',
                            help='show this help message and exit')
    subparsers = parser.add_subparsers(title='subcommands',
                                       dest='command',
                                       metavar='',
                                       help='Database operation')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Define parent parsers
    default_parent = getCommonArgParser(failed=False, log=False, format=False)
    multi_parent = getCommonArgParser(out_file=False,
                                      failed=False,
                                      log=False,
                                      format=False)

    # Subparser to add records
    parser_add = subparsers.add_parser(
        'add',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Adds field and value pairs.',
        description='Adds field and value pairs.')
    group_add = parser_add.add_argument_group('parsing arguments')
    group_add.add_argument('-f',
                           nargs='+',
                           action='store',
                           dest='fields',
                           required=True,
                           help='The name of the fields to add.')
    group_add.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        required=True,
        help='The value to assign to all rows for each field.')
    parser_add.set_defaults(func=addDbFile)

    # Subparser to delete records
    parser_delete = subparsers.add_parser(
        'delete',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Deletes specific records.',
        description='Deletes specific records.')
    group_delete = parser_delete.add_argument_group('parsing arguments')
    group_delete.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to check for deletion criteria.')
    group_delete.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        default=['', 'NA'],
        help='''The values defining which records to delete. A value
                                    may appear in any of the fields specified with -f.'''
    )
    group_delete.add_argument(
        '--logic',
        action='store',
        dest='logic',
        choices=('any', 'all'),
        default='any',
        help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).'''
    )
    group_delete.add_argument(
        '--regex',
        action='store_true',
        dest='regex',
        help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_delete.set_defaults(func=deleteDbFile)

    # Subparser to drop fields
    parser_drop = subparsers.add_parser('drop',
                                        parents=[default_parent],
                                        formatter_class=CommonHelpFormatter,
                                        add_help=False,
                                        help='Deletes entire fields.',
                                        description='Deletes entire fields.')
    group_drop = parser_drop.add_argument_group('parsing arguments')
    group_drop.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to delete from the database.')
    parser_drop.set_defaults(func=dropDbFile)

    # Subparser to index fields
    parser_index = subparsers.add_parser(
        'index',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Adds a numeric index field.',
        description='Adds a numeric index field.')
    group_index = parser_index.add_argument_group('parsing arguments')
    group_index.add_argument(
        '-f',
        action='store',
        dest='field',
        default=default_index_field,
        help='The name of the index field to add to the database.')
    parser_index.set_defaults(func=indexDbFile)

    # Subparser to rename fields
    parser_rename = subparsers.add_parser('rename',
                                          parents=[default_parent],
                                          formatter_class=CommonHelpFormatter,
                                          add_help=False,
                                          help='Renames fields.',
                                          description='Renames fields.')
    group_rename = parser_rename.add_argument_group('parsing arguments')
    group_rename.add_argument('-f',
                              nargs='+',
                              action='store',
                              dest='fields',
                              required=True,
                              help='List of fields to rename.')
    group_rename.add_argument('-k',
                              nargs='+',
                              action='store',
                              dest='names',
                              required=True,
                              help='List of new names for each field.')
    parser_rename.set_defaults(func=renameDbFile)

    # Subparser to select records
    parser_select = subparsers.add_parser(
        'select',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Selects specific records.',
        description='Selects specific records.')
    group_select = parser_select.add_argument_group('parsing arguments')
    group_select.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to check for selection criteria.')
    group_select.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        required=True,
        help='''The values defining with records to select. A value
                                    may appear in any of the fields specified with -f.'''
    )
    group_select.add_argument(
        '--logic',
        action='store',
        dest='logic',
        choices=('any', 'all'),
        default='any',
        help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).'''
    )
    group_select.add_argument(
        '--regex',
        action='store_true',
        dest='regex',
        help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_select.set_defaults(func=selectDbFile)

    # Subparser to sort file by records
    parser_sort = subparsers.add_parser(
        'sort',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Sorts records by field values.',
        description='Sorts records by field values.')
    group_sort = parser_sort.add_argument_group('parsing arguments')
    group_sort.add_argument(
        '-f',
        action='store',
        dest='field',
        type=str,
        required=True,
        help='The annotation field by which to sort records.')
    group_sort.add_argument(
        '--num',
        action='store_true',
        dest='numeric',
        default=False,
        help='''Specify to define the sort column as numeric rather
                                  than textual.''')
    group_sort.add_argument(
        '--descend',
        action='store_true',
        dest='descend',
        help='''If specified, sort records in descending, rather
                             than ascending, order by values in the target field.'''
    )
    parser_sort.set_defaults(func=sortDbFile)

    # Subparser to update records
    parser_update = subparsers.add_parser(
        'update',
        parents=[default_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Updates field and value pairs.',
        description='Updates field and value pairs.')
    group_update = parser_update.add_argument_group('parsing arguments')
    group_update.add_argument('-f',
                              action='store',
                              dest='field',
                              required=True,
                              help='The name of the field to update.')
    group_update.add_argument('-u',
                              nargs='+',
                              action='store',
                              dest='values',
                              required=True,
                              help='The values that will be replaced.')
    group_update.add_argument(
        '-t',
        nargs='+',
        action='store',
        dest='updates',
        required=True,
        help='''The new value to assign to each selected row.''')
    parser_update.set_defaults(func=updateDbFile)

    # Subparser to merge files
    parser_merge = subparsers.add_parser('merge',
                                         parents=[multi_parent],
                                         formatter_class=CommonHelpFormatter,
                                         add_help=False,
                                         help='Merges files.',
                                         description='Merges files.')
    group_merge = parser_merge.add_argument_group('parsing arguments')
    group_merge.add_argument(
        '-o',
        action='store',
        dest='out_file',
        default=None,
        help=
        '''Explicit output file name. Note, this argument cannot be used with 
                                   the --failed, --outdir or --outname arguments.'''
    )
    group_merge.add_argument(
        '--drop',
        action='store_true',
        dest='drop',
        help='''If specified, drop fields that do not exist in all input files.
                                   Otherwise, include all columns in all files and fill missing data 
                                   with empty strings.''')
    parser_merge.set_defaults(func=mergeDbFiles)

    # Subparser to partition files by annotation values
    parser_split = subparsers.add_parser(
        'split',
        parents=[multi_parent],
        formatter_class=CommonHelpFormatter,
        add_help=False,
        help='Splits database files by field values.',
        description='Splits database files by field values')
    group_split = parser_split.add_argument_group('parsing arguments')
    group_split.add_argument(
        '-f',
        action='store',
        dest='field',
        type=str,
        required=True,
        help='Annotation field by which to split database files.')
    group_split.add_argument(
        '--num',
        action='store',
        dest='num_split',
        type=float,
        default=None,
        help='''Specify to define the field as numeric and group
                                   records by whether they are less than or at least
                                   (greater than or equal to) the specified value.'''
    )
    parser_split.set_defaults(func=splitDbFile)

    return parser
Esempio n. 6
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent(
             '''
             output files:
                 airr
                     AIRR formatted database files.
                 changeo
                     Change-O formatted database files.
                 sequences
                     FASTA formatted sequences output from the subcommands fasta and clip.
                 genbank
                     feature tables and fasta files containing MiAIRR compliant input for tbl2asn.

             required fields:
                 sequence_id, sequence, sequence_alignment, junction, v_call, d_call, j_call, 
                 v_germline_start, v_germline_end, v_sequence_start, v_sequence_end, 
                 d_sequence_start, d_sequence_end, j_sequence_start, j_sequence_end 
                 
             optional fields:
                 germline_alignment, c_call, clone_id 
             ''')
    
    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter, add_help=False)
    group_help = parser.add_argument_group('help')
    group_help.add_argument('--version', action='version',
                            version='%(prog)s:' + ' %s %s' %(__version__, __date__))
    group_help.add_argument('-h', '--help', action='help', help='show this help message and exit')
    subparsers = parser.add_subparsers(title='subcommands', dest='command', metavar='',
                                       help='Database operation')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Define parent parsers
    default_parent = getCommonArgParser(failed=False, log=False, format=False)
    format_parent = getCommonArgParser(failed=False, log=False)

    # Subparser to convert changeo to AIRR files
    parser_airr = subparsers.add_parser('airr', parents=[default_parent],
                                        formatter_class=CommonHelpFormatter, add_help=False,
                                        help='Converts input to an AIRR TSV file.',
                                        description='Converts input to an AIRR TSV file.')
    parser_airr.set_defaults(func=convertToAIRR)

    # Subparser to convert AIRR to changeo files
    parser_changeo = subparsers.add_parser('changeo', parents=[default_parent],
                                       formatter_class=CommonHelpFormatter, add_help=False,
                                       help='Converts input into a Change-O TSV file.',
                                       description='Converts input into a Change-O TSV file.')
    parser_changeo.set_defaults(func=convertToChangeo)

    # Subparser to insert IMGT-gaps
    # desc_gap = dedent('''
    #                   Inserts IMGT numbering spacers into the observed sequence
    #                   (SEQUENCE_IMGT, sequence_alignment) and rebuilds the germline sequence
    #                   (GERMLINE_IMGT, germline_alignment) if present. Also adjusts the values
    #                   in the V germline coordinate fields (V_GERM_START_IMGT, V_GERM_LENGTH_IMGT;
    #                   v_germline_end, v_germline_start), which are required.
    #                   ''')
    # parser_gap = subparsers.add_parser('gap', parents=[format_parent],
    #                                     formatter_class=CommonHelpFormatter, add_help=False,
    #                                     help='Inserts IMGT numbering spacers into the V region.',
    #                                     description=desc_gap)
    # group_gap = parser_gap.add_argument_group('conversion arguments')
    # group_gap.add_argument('-r', nargs='+', action='store', dest='references', required=False,
    #                         help='''List of folders and/or fasta files containing
    #                                 IMGT-gapped germline sequences corresponding to the
    #                                 set of germlines used for the alignment.''')
    # parser_gap.set_defaults(func=insertGaps)

    # Subparser to convert database entries to sequence file
    parser_fasta = subparsers.add_parser('fasta', parents=[default_parent],
                                       formatter_class=CommonHelpFormatter, add_help=False,
                                       help='Creates a fasta file from database records.',
                                       description='Creates a fasta file from database records.')
    group_fasta = parser_fasta.add_argument_group('conversion arguments')
    group_fasta.add_argument('--if', action='store', dest='id_field',
                              default=default_id_field,
                              help='The name of the field containing identifiers')
    group_fasta.add_argument('--sf', action='store', dest='seq_field',
                              default=default_seq_field,
                              help='The name of the field containing sequences')
    group_fasta.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
                              help='List of annotation fields to add to the sequence description')
    parser_fasta.set_defaults(func=convertToFasta)
    
    # Subparser to convert database entries to clip-fasta file
    parser_baseln = subparsers.add_parser('baseline', parents=[default_parent],
                                          formatter_class=CommonHelpFormatter, add_help=False,
                                          description='Creates a BASELINe fasta file from database records.',
                                          help='''Creates a specially formatted fasta file
                                               from database records for input into the BASELINe
                                               website. The format groups clonally related sequences
                                               sequentially, with the germline sequence preceding
                                               each clone and denoted by headers starting with ">>".''')
    group_baseln = parser_baseln.add_argument_group('conversion arguments')
    group_baseln.add_argument('--if', action='store', dest='id_field',
                               default=default_id_field,
                               help='The name of the field containing identifiers')
    group_baseln.add_argument('--sf', action='store', dest='seq_field',
                               default=default_seq_field,
                               help='The name of the field containing reads')
    group_baseln.add_argument('--gf', action='store', dest='germ_field',
                               default=default_germ_field,
                               help='The name of the field containing germline sequences')
    group_baseln.add_argument('--cf', action='store', dest='cluster_field', default=None,
                               help='The name of the field containing containing sorted clone IDs')
    group_baseln.add_argument('--mf', nargs='+', action='store', dest='meta_fields',
                               help='List of annotation fields to add to the sequence description')
    parser_baseln.set_defaults(func=convertToBaseline)

    # Subparser to convert database entries to a GenBank fasta and feature table file
    parser_gb = subparsers.add_parser('genbank', parents=[format_parent],
                                       formatter_class=CommonHelpFormatter, add_help=False,
                                       help='Creates files for GenBank/TLS submissions.',
                                       description='Creates files for GenBank/TLS submissions.')
    # Genbank source information arguments
    group_gb_src = parser_gb.add_argument_group('source information arguments')
    group_gb_src.add_argument('--mol', action='store', dest='molecule', default=default_molecule,
                              help='''The source molecule type. Usually one of "mRNA" or "genomic DNA".''')
    group_gb_src.add_argument('--product', action='store', dest='product', default=default_product,
                              help='''The product name, such as "immunoglobulin heavy chain".''')
    group_gb_src.add_argument('--db', action='store', dest='db_xref', default=None,
                              help='''Name of the reference database used for alignment. 
                                   Usually "IMGT/GENE-DB".''')
    group_gb_src.add_argument('--inf', action='store', dest='inference', default=None,
                              help='''Name and version of the inference tool used for reference alignment in the 
                                   form tool:version.''')
    # Genbank sample information arguments
    group_gb_sam = parser_gb.add_argument_group('sample information arguments')
    group_gb_sam.add_argument('--organism', action='store', dest='organism', default=None,
                              help='The scientific name of the organism.')
    group_gb_sam.add_argument('--sex', action='store', dest='sex', default=None,
                              help='''If specified, adds the given sex annotation 
                                   to the fasta headers.''')
    group_gb_sam.add_argument('--isolate', action='store', dest='isolate', default=None,
                              help='''If specified, adds the given isolate annotation 
                                   (sample label) to the fasta headers.''')
    group_gb_sam.add_argument('--tissue', action='store', dest='tissue', default=None,
                              help='''If specified, adds the given tissue-type annotation 
                                   to the fasta headers.''')
    group_gb_sam.add_argument('--cell-type', action='store', dest='cell_type', default=None,
                              help='''If specified, adds the given cell-type annotation 
                                   to the fasta headers.''')
    group_gb_sam.add_argument('-y', action='store', dest='yaml_config', default=None,
                              help='''A yaml file specifying sample features (BioSample attributes) 
                                   in the form \'variable: value\'. If specified, any features provided in the 
                                   yaml file will override those provided at the commandline. Note,
                                   this config file applies to sample features only and
                                   cannot be used for required source features such as 
                                   the --product or --mol argument.''')
    # General genbank conversion arguments
    group_gb_cvt = parser_gb.add_argument_group('conversion arguments')
    group_gb_cvt.add_argument('--label', action='store', dest='label', default=None,
                              help='''If specified, add a field name to the sequence identifier. 
                                   Sequence identifiers will be output in the form <label>=<id>.''')
    group_gb_cvt.add_argument('--cf', action='store', dest='c_field', default=None,
                              help='''Field containing the C region call. If unspecified, the C region gene 
                                   call will be excluded from the feature table.''')
    group_gb_cvt.add_argument('--nf', action='store', dest='count_field', default=None,
                              help='''If specified, use the provided column to add the AIRR_READ_COUNT 
                                   note to the feature table.''')
    group_gb_cvt.add_argument('--if', action='store', dest='index_field', default=None,
                              help='''If specified, use the provided column to add the AIRR_CELL_INDEX 
                                   note to the feature table.''')
    group_gb_cvt.add_argument('--allow-stop', action='store_true', dest='allow_stop',
                              help='''If specified, retain records in the output with stop codons in the junction region.
                                   In such records the CDS will be removed and replaced with a similar misc_feature in 
                                   the feature table.''')
    group_gb_cvt.add_argument('--asis-id', action='store_true', dest='asis_id',
                              help='''If specified, use the existing sequence identifier for the output identifier. 
                                   By default, only the row number will be used as the identifier to avoid
                                   the 50 character limit.''')
    group_gb_cvt.add_argument('--asis-calls', action='store_true', dest='asis_calls',
                              help='''Specify to prevent alleles from being parsed using the IMGT nomenclature.
                                   Note, this requires the gene assignments to be exact matches to valid 
                                   records in the references database specified by the --db argument.''')
    group_gb_cvt.add_argument('--allele-delim', action='store', dest='allele_delim', default=default_allele_delim,
                              help='''The delimiter to use for splitting the gene name from the allele number.
                                   Note, this only applies when specifying --asis-calls. By default,
                                   this argument will be ignored and allele numbers extracted under the
                                   expectation of IMGT nomenclature consistency.''')
    group_gb_cvt.add_argument('--asn', action='store_true', dest='build_asn',
                              help='''If specified, run tbl2asn to generate the .sqn submission file after making 
                                   the .fsa and .tbl files.''')
    group_gb_cvt.add_argument('--sbt', action='store', dest='asn_template', default=None,
                              help='''If provided along with --asn, use the specified file for the template file
                                   argument to tbl2asn.''')
    group_gb_cvt.add_argument('--exec', action='store', dest='tbl2asn_exec', default=default_tbl2asn_exec,
                              help='The name or location of the tbl2asn executable.')
    parser_gb.set_defaults(func=convertToGenbank)

    return parser
Esempio n. 7
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent('''
             output files:
                 sequences
                     FASTA formatted sequences output from the subcommands fasta and clip.
                 <field>-<value>
                     database files partitioned by annotation <field> and <value>.
                 parse-<command>
                     output of the database modification functions where <command> is one of
                     the subcommands add, index, drop, delete, rename, select, sort or update.

             required fields:
                 SEQUENCE_ID
                 
             optional fields:
                 JUNCTION, SEQUENCE_IMGT, SEQUENCE_VDJ, GERMLINE_IMGT, GERMLINE_VDJ,
                 GERMLINE_IMGT_D_MASK, GERMLINE_VDJ_D_MASK,
                 GERMLINE_IMGT_V_REGION, GERMLINE_VDJ_V_REGION
                
             output fields:
                 None
             ''')

    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__,
                            epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s:' + ' %s-%s' %
                        (__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands',
                                       dest='command',
                                       metavar='',
                                       help='Database operation')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Define parent parser
    parser_parent = getCommonArgParser(seq_in=False,
                                       seq_out=False,
                                       db_in=True,
                                       failed=False,
                                       log=False)

    # Subparser to convert database entries to sequence file
    parser_seq = subparsers.add_parser(
        'fasta',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Creates a fasta file from database records.',
        description='Creates a fasta file from database records.')
    parser_seq.add_argument(
        '--if',
        action='store',
        dest='id_field',
        default=default_id_field,
        help='The name of the field containing identifiers')
    parser_seq.add_argument('--sf',
                            action='store',
                            dest='seq_field',
                            default=default_seq_field,
                            help='The name of the field containing sequences')
    parser_seq.add_argument(
        '--mf',
        nargs='+',
        action='store',
        dest='meta_fields',
        help='List of annotation fields to add to the sequence description')
    parser_seq.set_defaults(func=convertDbFasta)

    # Subparser to convert database entries to clip-fasta file
    parser_baseln = subparsers.add_parser(
        'baseline',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        description='Creates a BASELINe fasta file from database records.',
        help='''Creates a specially formatted fasta file
                                               from database records for input into the BASELINe
                                               website. The format groups clonally related sequences
                                               sequentially, with the germline sequence preceding
                                               each clone and denoted by headers starting with ">>".'''
    )
    parser_baseln.add_argument(
        '--if',
        action='store',
        dest='id_field',
        default=default_id_field,
        help='The name of the field containing identifiers')
    parser_baseln.add_argument('--sf',
                               action='store',
                               dest='seq_field',
                               default=default_seq_field,
                               help='The name of the field containing reads')
    parser_baseln.add_argument(
        '--gf',
        action='store',
        dest='germ_field',
        default=default_germ_field,
        help='The name of the field containing germline sequences')
    parser_baseln.add_argument(
        '--cf',
        action='store',
        dest='cluster_field',
        default=None,
        help='The name of the field containing containing sorted clone IDs')
    parser_baseln.add_argument(
        '--mf',
        nargs='+',
        action='store',
        dest='meta_fields',
        help='List of annotation fields to add to the sequence description')
    parser_baseln.set_defaults(func=convertDbBaseline)

    # Subparser to partition files by annotation values
    parser_split = subparsers.add_parser(
        'split',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Splits database files by field values.',
        description='Splits database files by field values')
    parser_split.add_argument(
        '-f',
        action='store',
        dest='field',
        type=str,
        required=True,
        help='Annotation field by which to split database files.')
    parser_split.add_argument(
        '--num',
        action='store',
        dest='num_split',
        type=float,
        default=None,
        help='''Specify to define the field as numeric and group
                                   records by whether they are less than or at least
                                   (greater than or equal to) the specified value.'''
    )
    parser_split.set_defaults(func=splitDbFile)

    # Subparser to add records
    parser_add = subparsers.add_parser(
        'add',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Adds field and value pairs.',
        description='Adds field and value pairs.')
    parser_add.add_argument('-f',
                            nargs='+',
                            action='store',
                            dest='fields',
                            required=True,
                            help='The name of the fields to add.')
    parser_add.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        required=True,
        help='The value to assign to all rows for each field.')
    parser_add.set_defaults(func=addDbFile)

    # Subparser to delete records
    parser_delete = subparsers.add_parser(
        'delete',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Deletes specific records.',
        description='Deletes specific records.')
    parser_delete.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to check for deletion criteria.')
    parser_delete.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        default=['', 'NA'],
        help='''The values defining which records to delete. A value
                                    may appear in any of the fields specified with -f.'''
    )
    parser_delete.add_argument(
        '--logic',
        action='store',
        dest='logic',
        choices=('any', 'all'),
        default='any',
        help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).'''
    )
    parser_delete.add_argument(
        '--regex',
        action='store_true',
        dest='regex',
        help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_delete.set_defaults(func=deleteDbFile)

    # Subparser to drop fields
    parser_drop = subparsers.add_parser('drop',
                                        parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Deletes entire fields.',
                                        description='Deletes entire fields.')
    parser_drop.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to delete from the database.')
    parser_drop.set_defaults(func=dropDbFile)

    # Subparser to index fields
    parser_index = subparsers.add_parser(
        'index',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Adds a numeric index field.',
        description='Adds a numeric index field.')
    parser_index.add_argument(
        '-f',
        action='store',
        dest='field',
        default=default_index_field,
        help='The name of the index field to add to the database.')
    parser_index.set_defaults(func=indexDbFile)

    # Subparser to rename fields
    parser_rename = subparsers.add_parser('rename',
                                          parents=[parser_parent],
                                          formatter_class=CommonHelpFormatter,
                                          help='Renames fields.',
                                          description='Renames fields.')
    parser_rename.add_argument('-f',
                               nargs='+',
                               action='store',
                               dest='fields',
                               required=True,
                               help='List of fields to rename.')
    parser_rename.add_argument('-k',
                               nargs='+',
                               action='store',
                               dest='names',
                               required=True,
                               help='List of new names for each field.')
    parser_rename.set_defaults(func=renameDbFile)

    # Subparser to select records
    parser_select = subparsers.add_parser(
        'select',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Selects specific records.',
        description='Selects specific records.')
    parser_select.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        required=True,
        help='The name of the fields to check for selection criteria.')
    parser_select.add_argument(
        '-u',
        nargs='+',
        action='store',
        dest='values',
        required=True,
        help='''The values defining with records to select. A value
                                    may appear in any of the fields specified with -f.'''
    )
    parser_select.add_argument(
        '--logic',
        action='store',
        dest='logic',
        choices=('any', 'all'),
        default='any',
        help='''Defines whether a value may appear in any field (any)
                                    or whether it must appear in all fields (all).'''
    )
    parser_select.add_argument(
        '--regex',
        action='store_true',
        dest='regex',
        help='''If specified, treat values as regular expressions
                                    and allow partial string matches.''')
    parser_select.set_defaults(func=selectDbFile)

    # Subparser to sort file by records
    parser_sort = subparsers.add_parser(
        'sort',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Sorts records by field values.',
        description='Sorts records by field values.')
    parser_sort.add_argument(
        '-f',
        action='store',
        dest='field',
        type=str,
        required=True,
        help='The annotation field by which to sort records.')
    parser_sort.add_argument(
        '--num',
        action='store_true',
        dest='numeric',
        default=False,
        help='''Specify to define the sort column as numeric rather
                                  than textual.''')
    parser_sort.add_argument(
        '--descend',
        action='store_true',
        dest='descend',
        help='''If specified, sort records in descending, rather
                             than ascending, order by values in the target field.'''
    )
    parser_sort.set_defaults(func=sortDbFile)

    # Subparser to update records
    parser_update = subparsers.add_parser(
        'update',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='Updates field and value pairs.',
        description='Updates field and value pairs.')
    parser_update.add_argument('-f',
                               action='store',
                               dest='field',
                               required=True,
                               help='The name of the field to update.')
    parser_update.add_argument('-u',
                               nargs='+',
                               action='store',
                               dest='values',
                               required=True,
                               help='The values that will be replaced.')
    parser_update.add_argument(
        '-t',
        nargs='+',
        action='store',
        dest='updates',
        required=True,
        help='''The new value to assign to each selected row.''')
    parser_update.set_defaults(func=updateDbFile)

    return parser
Esempio n. 8
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    fields = dedent(
             '''
              output files:
                  db-pass
                      database of parsed alignment records.
                  db-fail
                      database with records failing alignment.

              output fields:
                  SEQUENCE_ID, SEQUENCE_INPUT, FUNCTIONAL, IN_FRAME, STOP, MUTATED_INVARIANT,
                  INDELS, V_CALL, D_CALL, J_CALL, SEQUENCE_VDJ and/or SEQUENCE_IMGT,
                  V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_VDJ and/or V_GERM_START_IMGT,
                  V_GERM_LENGTH_VDJ and/or V_GERM_LENGTH_IMGT, N1_LENGTH,
                  D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH, N2_LENGTH,
                  J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH,
                  JUNCTION_LENGTH, JUNCTION, V_SCORE, V_IDENTITY, V_EVALUE, V_BTOP,
                  J_SCORE, J_IDENTITY, J_EVALUE, J_BTOP, FWR1_IMGT, FWR2_IMGT, FWR3_IMGT,
                  FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, CDR3_IMGT
              ''')
                
    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands', dest='command',
                                       help='Aligner used', metavar='')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Parent parser    
    parser_parent = getCommonArgParser(seq_in=False, seq_out=False, log=False)

    # IgBlast Aligner
    parser_igblast = subparsers.add_parser('igblast', help='Process IgBlast output',
                                           parents=[parser_parent],
                                           formatter_class=CommonHelpFormatter)
    parser_igblast.set_defaults(func=parseIgBlast)
    parser_igblast.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                                required=True,
                                help='''IgBLAST output files in format 7 with query sequence
                                     (IgBLAST argument \'-outfmt "7 std qseq sseq btop"\').''')
    parser_igblast.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                                help='''List of folders and/or fasta files containing
                                     IMGT-gapped germline sequences corresponding to the
                                     set of germlines used in the IgBLAST alignment.''')
    parser_igblast.add_argument('-s', action='store', nargs='+', dest='seq_files',
                                required=True,
                                help='List of input FASTA files containing sequences')
    parser_igblast.add_argument('--noparse', action='store_true', dest='no_parse',
                                help='''Specify if input IDs should not be parsed to add
                                     new columns to database.''')
    parser_igblast.add_argument('--scores', action='store_true', dest='score_fields',
                                help='''Specify if alignment score metrics should be
                                     included in the output. Adds the V_SCORE, V_IDENTITY,
                                     V_EVALUE, V_BTOP, J_SCORE, J_IDENTITY,
                                     J_BTOP, and J_EVALUE columns.''')
    parser_igblast.add_argument('--regions', action='store_true', dest='region_fields',
                                help='''Specify if IMGT framework and CDR regions should be
                                     included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
                                     FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
                                     CDR3_IMGT columns.''')
    
    # IMGT aligner
    parser_imgt = subparsers.add_parser('imgt', help='Process IMGT/HighV-Quest output', 
                                        parents=[parser_parent], 
                                        formatter_class=CommonHelpFormatter)
    imgt_arg_group =  parser_imgt.add_mutually_exclusive_group(required=True)
    imgt_arg_group.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                                help='''Either zipped IMGT output files (.zip) or a folder
                                     containing unzipped IMGT output files (which must
                                     include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences,
                                     and 6_Junction).''')
    parser_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files',
                             required=False,
                             help='List of input FASTA files containing sequences')
    parser_imgt.add_argument('--noparse', action='store_true', dest='no_parse', 
                             help='''Specify if input IDs should not be parsed to add new
                                  columns to database.''')
    parser_imgt.add_argument('--scores', action='store_true', dest='score_fields',
                             help='''Specify if alignment score metrics should be
                                  included in the output. Adds the V_SCORE, V_IDENTITY,
                                  J_SCORE and J_IDENTITY. Note, this will also add
                                  the columns V_EVALUE, V_BTOP, J_EVALUE and J_BTOP,
                                  but they will be empty for IMGT output.''')
    parser_imgt.add_argument('--regions', action='store_true', dest='region_fields',
                             help='''Specify if IMGT framework and CDR regions should be
                                  included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
                                  FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
                                  CDR3_IMGT columns.''')
    parser_imgt.set_defaults(func=parseIMGT)

    return parser
Esempio n. 9
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments:
    None

    Returns:
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent('''
             output files:
                 germ-pass
                    database with assigned germline sequences.
                 germ-fail
                    database with records failing germline assignment.

             required fields:
                 SEQUENCE_ID, SEQUENCE_VDJ or SEQUENCE_IMGT,
                 V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL,
                 V_SEQ_START, V_SEQ_LENGTH, V_GERM_START_IMGT, V_GERM_LENGTH_IMGT,
                 D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH,
                 J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH,
                 NP1_LENGTH, NP2_LENGTH

             optional fields:
                 N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH,
                 CLONE


             output fields:
                 GERMLINE_VDJ, GERMLINE_VDJ_D_MASK, GERMLINE_VDJ_V_REGION,
                 GERMLINE_IMGT, GERMLINE_IMGT_D_MASK, GERMLINE_IMGT_V_REGION,
                 GERMLINE_V_CALL, GERMLINE_D_CALL, GERMLINE_J_CALL,
                 GERMLINE_REGIONS
              ''')

    # Parent parser
    parser_parent = getCommonArgParser(seq_in=False,
                                       seq_out=False,
                                       db_in=True,
                                       annotation=False)
    # Define argument parser
    parser = ArgumentParser(description=__doc__,
                            epilog=fields,
                            parents=[parser_parent],
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s:' + ' %s-%s' %
                        (__version__, __date__))

    parser.add_argument(
        '-r',
        nargs='+',
        action='store',
        dest='repo',
        required=True,
        help='''List of folders and/or fasta files (with .fasta, .fna or .fa
                         extension) with germline sequences.''')
    parser.add_argument(
        '-g',
        action='store',
        dest='germ_types',
        default=default_germ_types,
        nargs='+',
        choices=('full', 'dmask', 'vonly', 'regions'),
        help='''Specify type(s) of germlines to include full germline,
                             germline with D-region masked, or germline for V region only.'''
    )
    parser.add_argument(
        '--cloned',
        action='store_true',
        dest='cloned',
        help=
        '''Specify to create only one germline per clone. Assumes input file is
                             sorted by clone column, and will not yield correct results if the data
                             is unsorted. Note, if allele calls are ambiguous within a clonal group,
                             this will place the germline call used for the entire clone within the
                             GERMLINE_V_CALL, GERMLINE_D_CALL and GERMLINE_J_CALL fields.'''
    )
    parser.add_argument('--vf',
                        action='store',
                        dest='v_field',
                        default=default_v_field,
                        help='Specify field to use for germline V call')
    parser.add_argument('--sf',
                        action='store',
                        dest='seq_field',
                        default=default_seq_field,
                        help='Specify field to use for sequence')

    return parser
Esempio n. 10
0
def getArgParser():
    """
    Defines the ArgumentParser.

    Returns: 
      argparse.ArgumentParser
    """
    fields = dedent(
             '''
              output files:
                  db-pass
                      database of alignment records with functionality information,
                      V and J calls, and a junction region.
                  db-fail
                      database with records that fail due to no functionality information
                      (did not pass IMGT), no V call, no J call, or no junction region.

              universal output fields:
                  SEQUENCE_ID, SEQUENCE_INPUT, SEQUENCE_VDJ, SEQUENCE_IMGT,
                  FUNCTIONAL, IN_FRAME, STOP, MUTATED_INVARIANT, INDELS,
                  V_CALL, D_CALL, J_CALL,
                  V_SEQ_START, V_SEQ_LENGTH,
                  D_SEQ_START, D_SEQ_LENGTH, D_GERM_START, D_GERM_LENGTH,
                  J_SEQ_START, J_SEQ_LENGTH, J_GERM_START, J_GERM_LENGTH,
                  JUNCTION_LENGTH, JUNCTION, NP1_LENGTH, NP2_LENGTH,
                  FWR1_IMGT, FWR2_IMGT, FWR3_IMGT, FWR4_IMGT,
                  CDR1_IMGT, CDR2_IMGT, CDR3_IMGT

              imgt specific output fields:
                  V_GERM_START_IMGT, V_GERM_LENGTH_IMGT,
                  N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH, P5J_LENGTH,
                  D_FRAME, V_SCORE, V_IDENTITY, J_SCORE, J_IDENTITY,

              igblast specific output fields:
                  V_GERM_START_VDJ, V_GERM_LENGTH_VDJ,
                  V_EVALUE, V_SCORE, V_IDENTITY, V_BTOP,
                  J_EVALUE, J_SCORE, J_IDENTITY, J_BTOP.
                  CDR3_IGBLAST_NT, CDR3_IGBLAST_AA

              ihmm specific output fields:
                  V_GERM_START_VDJ, V_GERM_LENGTH_VDJ,
                  HMM_SCORE
              ''')
                
    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version', action='version',
                        version='%(prog)s:' + ' %s-%s' %(__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands', dest='command',
                                       help='Aligner used', metavar='')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Parent parser    
    parser_parent = getCommonArgParser(seq_in=False, seq_out=False, log=False)

    # IgBlast Aligner
    parser_igblast = subparsers.add_parser('igblast', parents=[parser_parent],
                                           formatter_class=CommonHelpFormatter,
                                           help='Process IgBLAST output.',
                                           description='Process IgBLAST output.')
    parser_igblast.add_argument('-i', nargs='+', action='store', dest='aligner_outputs',
                                required=True,
                                help='''IgBLAST output files in format 7 with query sequence
                                     (IgBLAST argument \'-outfmt "7 std qseq sseq btop"\').''')
    parser_igblast.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                                help='''List of folders and/or fasta files containing
                                     IMGT-gapped germline sequences corresponding to the
                                     set of germlines used in the IgBLAST alignment.''')
    parser_igblast.add_argument('-s', action='store', nargs='+', dest='seq_files',
                                required=True,
                                help='''List of input FASTA files (with .fasta, .fna or .fa
                                     extension), containing sequences.''')
    parser_igblast.add_argument('--noparse', action='store_true', dest='no_parse',
                                help='''Specify to prevent input sequence headers from being parsed
                                    to add new columns to database. Parsing of sequence headers requires
                                    headers to be in the pRESTO annotation format, so this should be specified
                                    when sequence headers are incompatible with the pRESTO annotation scheme.
                                    Note, unrecognized header formats will default to this behavior.''')
    parser_igblast.add_argument('--partial', action='store_true', dest='partial',
                                help='''If specified, include incomplete V(D)J alignments in
                                     the pass file instead of the fail file.''')
    parser_igblast.add_argument('--scores', action='store_true', dest='parse_scores',
                                help='''Specify if alignment score metrics should be
                                     included in the output. Adds the V_SCORE, V_IDENTITY,
                                     V_EVALUE, V_BTOP, J_SCORE, J_IDENTITY,
                                     J_BTOP, and J_EVALUE columns.''')
    parser_igblast.add_argument('--regions', action='store_true', dest='parse_regions',
                                help='''Specify if IMGT FWR and CDRs should be
                                     included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
                                     FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
                                     CDR3_IMGT columns.''')
    parser_igblast.add_argument('--cdr3', action='store_true',
                                dest='parse_igblast_cdr3', 
                                help='''Specify if the CDR3 sequences generated by IgBLAST 
                                     should be included in the output. Adds the columns
                                     CDR3_IGBLAST_NT and CDR3_IGBLAST_AA. Requires IgBLAST
                                     version 1.5 or greater.''')
    parser_igblast.set_defaults(func=parseIgBLAST)

    # IMGT aligner
    parser_imgt = subparsers.add_parser('imgt', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='''Process IMGT/HighV-Quest output
                                             (does not work with V-QUEST).''',
                                        description='''Process IMGT/HighV-Quest output
                                             (does not work with V-QUEST).''')
    parser_imgt.add_argument('-i', nargs='+', action='store', dest='aligner_outputs',
                             help='''Either zipped IMGT output files (.zip or .txz) or a
                                  folder containing unzipped IMGT output files (which must
                                  include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences,
                                  and 6_Junction).''')
    parser_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files',
                             required=False,
                             help='''List of input FASTA files (with .fasta, .fna or .fa
                                  extension) containing sequences.''')
    parser_imgt.add_argument('--noparse', action='store_true', dest='no_parse', 
                             help='''Specify to prevent input sequence headers from being parsed
                                  to add new columns to database. Parsing of sequence headers requires
                                  headers to be in the pRESTO annotation format, so this should be specified
                                  when sequence headers are incompatible with the pRESTO annotation scheme.
                                  Note, unrecognized header formats will default to this behavior.''')
    parser_imgt.add_argument('--partial', action='store_true', dest='partial',
                             help='''If specified, include incomplete V(D)J alignments in
                                  the pass file instead of the fail file.''')
    parser_imgt.add_argument('--scores', action='store_true', dest='parse_scores',
                             help='''Specify if alignment score metrics should be
                                  included in the output. Adds the V_SCORE, V_IDENTITY,
                                  J_SCORE and J_IDENTITY.''')
    parser_imgt.add_argument('--regions', action='store_true', dest='parse_regions',
                             help='''Specify if IMGT FWRs and CDRs should be
                                  included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
                                  FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
                                  CDR3_IMGT columns.''')
    parser_imgt.add_argument('--junction', action='store_true', dest='parse_junction',
                             help='''Specify if detailed junction fields should be
                                  included in the output. Adds the columns 
                                  N1_LENGTH, N2_LENGTH, P3V_LENGTH, P5D_LENGTH, P3D_LENGTH,
                                  P5J_LENGTH, D_FRAME.''')
    parser_imgt.set_defaults(func=parseIMGT)

    # iHMMuneAlign Aligner
    parser_ihmm = subparsers.add_parser('ihmm', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter,
                                        help='Process iHMMune-Align output.',
                                        description='Process iHMMune-Align output.')
    parser_ihmm.add_argument('-i', nargs='+', action='store', dest='aligner_outputs',
                             required=True,
                             help='''iHMMune-Align output file.''')
    parser_ihmm.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                             help='''List of folders and/or FASTA files containing
                                  IMGT-gapped germline sequences corresponding to the
                                  set of germlines used in the IgBLAST alignment.''')
    parser_ihmm.add_argument('-s', action='store', nargs='+', dest='seq_files',
                             required=True,
                             help='''List of input FASTA files (with .fasta, .fna or .fa
                                  extension) containing sequences.''')
    parser_ihmm.add_argument('--noparse', action='store_true', dest='no_parse',
                             help='''Specify to prevent input sequence headers from being parsed
                                  to add new columns to database. Parsing of sequence headers requires
                                  headers to be in the pRESTO annotation format, so this should be specified
                                  when sequence headers are incompatible with the pRESTO annotation scheme.
                                  Note, unrecognized header formats will default to this behavior.''')
    parser_ihmm.add_argument('--partial', action='store_true', dest='partial',
                             help='''If specified, include incomplete V(D)J alignments in
                                  the pass file instead of the fail file.''')
    parser_ihmm.add_argument('--scores', action='store_true', dest='parse_scores',
                             help='''Specify if alignment score metrics should be
                                  included in the output. Adds the path score of the
                                  iHMMune-Align hidden Markov model to HMM_SCORE.''')
    parser_ihmm.add_argument('--regions', action='store_true', dest='parse_regions',
                             help='''Specify if IMGT FWRs and CDRs should be
                                  included in the output. Adds the FWR1_IMGT, FWR2_IMGT,
                                  FWR3_IMGT, FWR4_IMGT, CDR1_IMGT, CDR2_IMGT, and
                                  CDR3_IMGT columns.''')
    parser_ihmm.set_defaults(func=parseIHMM)

    return parser
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments:
    None

    Returns:
    an ArgumentParser object
    """
    # Define input and output field help message
    fields = dedent(
             '''
             output files:
                 germ-pass
                    database with assigned germline sequences.
                 germ-fail
                    database with records failing germline assignment.

             required fields:
                 sequence_id, sequence_alignment, v_call, d_call, j_call, 
                 v_sequence_start, v_sequence_end, v_germline_start, v_germline_end,
                 d_sequence_start, d_sequence_end, d_germline_start, d_germline_end,
                 j_sequence_start, j_sequence_end, j_germline_start, j_germline_end,
                 np1_length, np2_length

             optional fields:
                 n1_length, n2_length, p3v_length, p5d_length, p3d_length, p5j_length,
                 clone_id

             output fields:
                 germline_v_call, germline_d_call, germline_j_call,
                 germline_alignment, germline_alignment_d_mask, 
                 germline_alignment_v_region, germline_regions, 
              ''')
    # Define argument parser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            parents=[getCommonArgParser(format=True)],
                            formatter_class=CommonHelpFormatter, add_help=False)

    # Germlines arguments
    group = parser.add_argument_group('germline construction arguments')
    group.add_argument('-r', nargs='+', action='store', dest='references', required=True,
                        help='''List of folders and/or fasta files (with .fasta, .fna or .fa
                         extension) with germline sequences. When using the default
                         Change-O sequence and coordinate fields, these reference sequences 
                         must contain IMGT-numbering spacers (gaps) in the V segment. 
                         Alternative numbering schemes, or no numbering, may work for alternative 
                         sequence and coordinate definitions that define a valid alignment, but 
                         a warning will be issued.''')
    group.add_argument('-g', action='store', dest='germ_types', default=default_germ_types,
                        nargs='+', choices=('full', 'dmask', 'vonly', 'regions'),
                        help='''Specify type(s) of germlines to include full germline,
                             germline with D segment masked, or germline for V segment only.''')
    group.add_argument('--cloned', action='store_true', dest='cloned',
                        help='''Specify to create only one germline per clone. Note, if allele 
                             calls are ambiguous within a clonal group, this will place the 
                             germline call used for the entire clone within the
                             germline_v_call, germline_d_call and germline_j_call fields.''')
    group.add_argument('--sf', action='store', dest='seq_field', default=None,
                        help='''Field containing the aligned sequence.
                             Defaults to sequence_alignment (airr) or SEQUENCE_IMGT (changeo).''')
    group.add_argument('--vf', action='store', dest='v_field', default=None,
                        help='''Field containing the germline V segment call.
                             Defaults to v_call (airr) or V_CALL (changeo).''')
    group.add_argument('--df', action='store', dest='d_field', default=None,
                        help='''Field containing the germline D segment call.
                             Defaults to d_call (airr) or D_CALL (changeo).''')
    group.add_argument('--jf', action='store', dest='j_field', default=None,
                        help='''Field containing the germline J segment call.
                             Defaults to j_call (airr) or J_CALL (changeo).''')
    group.add_argument('--cf', action='store', dest='clone_field', default=None,
                        help='''Field containing clone identifiers. 
                             Ignored if --cloned is not also specified.
                             Defaults to clone_id (airr) or CLONE (changeo).''')

    return parser
Esempio n. 12
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output fields
    fields = dedent('''
             output files:
                 clone-pass
                     database with assigned clonal group numbers.
                 clone-fail
                     database with records failing clonal grouping.

             required fields:
                 SEQUENCE_ID, V_CALL or V_CALL_GENOTYPED, D_CALL, J_CALL, JUNCTION

                 <field>
                     sequence field specified by the --sf parameter
                
             output fields:
                 CLONE
              ''')

    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__,
                            epilog=fields,
                            formatter_class=CommonHelpFormatter)
    parser.add_argument('--version',
                        action='version',
                        version='%(prog)s:' + ' %s-%s' %
                        (__version__, __date__))
    subparsers = parser.add_subparsers(title='subcommands',
                                       dest='command',
                                       metavar='',
                                       help='Cloning method')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Parent parser
    parser_parent = getCommonArgParser(seq_in=False,
                                       seq_out=False,
                                       db_in=True,
                                       multiproc=True)

    # Distance cloning method
    parser_bygroup = subparsers.add_parser(
        'bygroup',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='''Defines clones as having same V assignment,
                                                J assignment, and junction length with
                                                specified substitution distance model.''',
        description='''Defines clones as having same V assignment,
                                                       J assignment, and junction length with
                                                       specified substitution distance model.'''
    )
    parser_bygroup.add_argument(
        '-f',
        nargs='+',
        action='store',
        dest='fields',
        default=None,
        help='Additional fields to use for grouping clones (non VDJ)')
    parser_bygroup.add_argument(
        '--mode',
        action='store',
        dest='mode',
        choices=('allele', 'gene'),
        default=default_index_mode,
        help='''Specifies whether to use the V(D)J allele or gene for
                                  initial grouping.''')
    parser_bygroup.add_argument(
        '--act',
        action='store',
        dest='action',
        choices=('first', 'set'),
        default=default_index_action,
        help='''Specifies how to handle multiple V(D)J assignments
                                  for initial grouping.''')
    parser_bygroup.add_argument(
        '--model',
        action='store',
        dest='model',
        choices=choices_bygroup_model,
        default=default_bygroup_model,
        help=
        '''Specifies which substitution model to use for calculating distance
                                  between sequences. The "ham" model is nucleotide Hamming distance and
                                  "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are
                                  human specific single nucleotide and 5-mer content models, respectively,
                                  from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are
                                  mouse specific single nucleotide and 5-mer content models, respectively,
                                  from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are
                                  deprecated models provided backwards compatibility with the "m1n" and
                                  "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both
                                  5-mer models should be considered experimental.'''
    )
    parser_bygroup.add_argument(
        '--dist',
        action='store',
        dest='distance',
        type=float,
        default=default_distance,
        help='The distance threshold for clonal grouping')
    parser_bygroup.add_argument(
        '--norm',
        action='store',
        dest='norm',
        choices=('len', 'mut', 'none'),
        default=default_norm,
        help='''Specifies how to normalize distances. One of none
                                  (do not normalize), len (normalize by length),
                                  or mut (normalize by number of mutations between sequences).'''
    )
    parser_bygroup.add_argument(
        '--sym',
        action='store',
        dest='sym',
        choices=('avg', 'min'),
        default=default_sym,
        help='''Specifies how to combine asymmetric distances. One of avg
                                  (average of A->B and B->A) or min (minimum of A->B and B->A).'''
    )
    parser_bygroup.add_argument(
        '--link',
        action='store',
        dest='linkage',
        choices=('single', 'average', 'complete'),
        default=default_linkage,
        help='''Type of linkage to use for hierarchical clustering.''')
    parser_bygroup.add_argument(
        '--maxmiss',
        action='store',
        dest='max_missing',
        type=int,
        default=default_max_missing,
        help='''The maximum number of non-ACGT characters (gaps or Ns) to 
                                     permit in the junction sequence before excluding the record 
                                     from clonal assignment. Warning, under single linkage 
                                     non-informative positions can create artifactual links 
                                     between unrelated sequences. Use with caution.'''
    )
    parser_bygroup.add_argument(
        '--sf',
        action='store',
        dest='seq_field',
        default=default_seq_field,
        help='''The name of the field to be used to calculate
                                     distance between records''')
    parser_bygroup.set_defaults(feed_func=feedQueue)
    parser_bygroup.set_defaults(work_func=processQueue)
    parser_bygroup.set_defaults(collect_func=collectQueue)
    parser_bygroup.set_defaults(group_func=indexJunctions)
    parser_bygroup.set_defaults(clone_func=distanceClones)

    # Chen2010
    parser_chen = subparsers.add_parser(
        'chen2010',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='''Defines clones by method specified in Chen, 2010.''',
        description='''Defines clones by method specified in Chen, 2010.''')
    parser_chen.set_defaults(feed_func=feedQueueClust)
    parser_chen.set_defaults(work_func=processQueueClust)
    parser_chen.set_defaults(collect_func=collectQueueClust)
    parser_chen.set_defaults(cluster_func=hierClust)

    # Ademokun2011
    parser_ade = subparsers.add_parser(
        'ademokun2011',
        parents=[parser_parent],
        formatter_class=CommonHelpFormatter,
        help='''Defines clones by method specified in Ademokun, 2011.''',
        description='''Defines clones by method specified in Ademokun, 2011.'''
    )
    parser_ade.set_defaults(feed_func=feedQueueClust)
    parser_ade.set_defaults(work_func=processQueueClust)
    parser_ade.set_defaults(collect_func=collectQueueClust)
    parser_ade.set_defaults(cluster_func=hierClust)

    return parser
Esempio n. 13
0
def getArgParser():
    """
    Defines the ArgumentParser.

    Returns: 
      argparse.ArgumentParser
    """
    fields = dedent(
             '''
              output files:
                  db-pass
                      database of alignment records with functionality information,
                      V and J calls, and a junction region.
                  db-fail
                      database with records that fail due to no productivity information,
                      no gene V assignment, no J assignment, or no junction region.
                 
              universal output fields:
                 sequence_id, sequence, sequence_alignment, germline_alignment, 
                 rev_comp, productive, stop_codon, vj_in_frame, locus, 
                 v_call, d_call, j_call, junction, junction_length, junction_aa, 
                 v_sequence_start, v_sequence_end, v_germline_start, v_germline_end,
                 d_sequence_start, d_sequence_end, d_germline_start, d_germline_end,
                 j_sequence_start, j_sequence_end, j_germline_start, j_germline_end,
                 np1_length, np2_length, fwr1, fwr2, fwr3, fwr4, cdr1, cdr2, cdr3

              imgt specific output fields:
                  n1_length, n2_length, p3v_length, p5d_length, p3d_length, p5j_length, 
                  d_frame, v_score, v_identity, d_score, d_identity, j_score, j_identity 
                               
              igblast specific output fields:
                  v_score, v_identity, v_support, v_cigar, 
                  d_score, d_identity, d_support, d_cigar, 
                  j_score, j_identity, j_support, j_cigar

              ihmm specific output fields:
                  vdj_score
                  
              10X specific output fields:
                  cell_id, c_call, consensus_count, umi_count, 
                  v_call_10x, d_call_10x, j_call_10x,
                  junction_10x, junction_10x_aa
              ''')
                
    # Define ArgumentParser
    parser = ArgumentParser(description=__doc__, epilog=fields,
                            formatter_class=CommonHelpFormatter, add_help=False)
    group_help = parser.add_argument_group('help')
    group_help.add_argument('--version', action='version',
                            version='%(prog)s:' + ' %s %s' %(__version__, __date__))
    group_help.add_argument('-h', '--help', action='help', help='show this help message and exit')
    subparsers = parser.add_subparsers(title='subcommands', dest='command',
                                       help='Aligner used', metavar='')
    # TODO:  This is a temporary fix for Python issue 9253
    subparsers.required = True

    # Parent parser
    parser_parent = getCommonArgParser(db_in=False)

    # igblastn output parser
    parser_igblast = subparsers.add_parser('igblast', parents=[parser_parent],
                                           formatter_class=CommonHelpFormatter, add_help=False,
                                           help='Process igblastn output.',
                                           description='Process igblastn output.')
    group_igblast = parser_igblast.add_argument_group('aligner parsing arguments')
    group_igblast.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                                required=True,
                                help='''IgBLAST output files in format 7 with query sequence
                                     (igblastn argument \'-outfmt "7 std qseq sseq btop"\').''')
    group_igblast.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                                help='''List of folders and/or fasta files containing
                                     the same germline set used in the IgBLAST alignment. These
                                     reference sequences must contain IMGT-numbering spacers (gaps)
                                     in the V segment.''')
    group_igblast.add_argument('-s', action='store', nargs='+', dest='seq_files',
                                required=True,
                                help='''List of input FASTA files (with .fasta, .fna or .fa
                                     extension), containing sequences.''')
    group_igblast.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
                                help='''Table file containing 10X annotations (with .csv or .tsv
                                     extension).''')
    group_igblast.add_argument('--asis-id', action='store_true', dest='asis_id',
                                help='''Specify to prevent input sequence headers from being parsed
                                     to add new columns to database. Parsing of sequence headers requires
                                     headers to be in the pRESTO annotation format, so this should be specified
                                     when sequence headers are incompatible with the pRESTO annotation scheme.
                                     Note, unrecognized header formats will default to this behavior.''')
    group_igblast.add_argument('--asis-calls', action='store_true', dest='asis_calls',
                                help='''Specify to prevent gene calls from being parsed into standard allele names
                                     in both the IgBLAST output and reference database. Note, this requires
                                     the sequence identifiers in the reference sequence set and the IgBLAST
                                     database to be exact string matches.''')
    group_igblast.add_argument('--partial', action='store_true', dest='partial',
                                help='''If specified, include incomplete V(D)J alignments in
                                     the pass file instead of the fail file. An incomplete alignment
                                     is defined as a record for which a valid IMGT-gapped sequence 
                                     cannot be built or that is missing a V gene assignment, 
                                     J gene assignment, junction region, or productivity call.''')
    group_igblast.add_argument('--extended', action='store_true', dest='extended',
                               help='''Specify to include additional aligner specific fields in the output. 
                                    Adds <vdj>_score, <vdj>_identity, <vdj>_support, <vdj>_cigar,
                                    fwr1, fwr2, fwr3, fwr4, cdr1, cdr2 and cdr3.''')
    group_igblast.add_argument('--regions', action='store', dest='regions',
                               choices=('default', 'rhesus-igl'), default='default',
                               help='''IMGT CDR and FWR boundary definition to use.''')
    parser_igblast.set_defaults(func=parseIgBLAST, amino_acid=False)

    # igblastp output parser
    parser_igblast_aa = subparsers.add_parser('igblast-aa', parents=[parser_parent],
                                           formatter_class=CommonHelpFormatter, add_help=False,
                                           help='Process igblastp output.',
                                           description='Process igblastp output.')
    group_igblast_aa = parser_igblast_aa.add_argument_group('aligner parsing arguments')
    group_igblast_aa.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                                  required=True,
                                  help='''IgBLAST output files in format 7 with query sequence
                                       (igblastp argument \'-outfmt "7 std qseq sseq btop"\').''')
    group_igblast_aa.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                                  help='''List of folders and/or fasta files containing
                                       the same germline set used in the IgBLAST alignment. These
                                       reference sequences must contain IMGT-numbering spacers (gaps)
                                       in the V segment.''')
    group_igblast_aa.add_argument('-s', action='store', nargs='+', dest='seq_files', required=True,
                                  help='''List of input FASTA files (with .fasta, .fna or .fa
                                       extension), containing sequences.''')
    group_igblast_aa.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
                                  help='''Table file containing 10X annotations (with .csv or .tsv extension).''')
    group_igblast_aa.add_argument('--asis-id', action='store_true', dest='asis_id',
                                  help='''Specify to prevent input sequence headers from being parsed
                                       to add new columns to database. Parsing of sequence headers requires
                                       headers to be in the pRESTO annotation format, so this should be specified
                                       when sequence headers are incompatible with the pRESTO annotation scheme.
                                       Note, unrecognized header formats will default to this behavior.''')
    group_igblast_aa.add_argument('--asis-calls', action='store_true', dest='asis_calls',
                                  help='''Specify to prevent gene calls from being parsed into standard allele names
                                       in both the IgBLAST output and reference database. Note, this requires
                                       the sequence identifiers in the reference sequence set and the IgBLAST
                                       database to be exact string matches.''')
    group_igblast_aa.add_argument('--extended', action='store_true', dest='extended',
                                  help='''Specify to include additional aligner specific fields in the output. 
                                       Adds v_score, v_identity, v_support, v_cigar, fwr1, fwr2, fwr3, cdr1 and cdr2.''')
    group_igblast_aa.add_argument('--regions', action='store', dest='regions',
                                  choices=('default', 'rhesus-igl'), default='default',
                                  help='''IMGT CDR and FWR boundary definition to use.''')
    parser_igblast_aa.set_defaults(func=parseIgBLAST, partial=True, amino_acid=True)


    # IMGT aligner
    parser_imgt = subparsers.add_parser('imgt', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter, add_help=False,
                                        help='''Process IMGT/HighV-Quest output
                                             (does not work with V-QUEST).''',
                                        description='''Process IMGT/HighV-Quest output
                                             (does not work with V-QUEST).''')
    group_imgt = parser_imgt.add_argument_group('aligner parsing arguments')
    group_imgt.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                            help='''Either zipped IMGT output files (.zip or .txz) or a
                                 folder containing unzipped IMGT output files (which must
                                 include 1_Summary, 2_IMGT-gapped, 3_Nt-sequences,
                                 and 6_Junction).''')
    group_imgt.add_argument('-s', nargs='*', action='store', dest='seq_files', required=False,
                            help='''List of FASTA files (with .fasta, .fna or .fa
                                  extension) that were submitted to IMGT/HighV-QUEST. 
                                  If unspecified, sequence identifiers truncated by IMGT/HighV-QUEST
                                  will not be corrected.''')
    group_imgt.add_argument('-r', nargs='+', action='store', dest='repo', required=False,
                            help='''List of folders and/or fasta files containing
                                 the germline sequence set used by IMGT/HighV-QUEST. 
                                 These reference sequences must contain IMGT-numbering spacers (gaps)
                                 in the V segment. If unspecified, the germline sequence reconstruction 
                                 will not be included in the output.''')
    group_imgt.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
                            help='''Table file containing 10X annotations (with .csv or .tsv
                                 extension).''')
    group_imgt.add_argument('--asis-id', action='store_true', dest='asis_id',
                            help='''Specify to prevent input sequence headers from being parsed
                                 to add new columns to database. Parsing of sequence headers requires
                                 headers to be in the pRESTO annotation format, so this should be specified
                                 when sequence headers are incompatible with the pRESTO annotation scheme.
                                 Note, unrecognized header formats will default to this behavior.''')
    group_imgt.add_argument('--partial', action='store_true', dest='partial',
                            help='''If specified, include incomplete V(D)J alignments in
                                 the pass file instead of the fail file. An incomplete alignment
                                 is defined as a record that is missing a V gene assignment, 
                                 J gene assignment, junction region, or productivity call.''')
    group_imgt.add_argument('--extended', action='store_true', dest='extended',
                            help='''Specify to include additional aligner specific fields in the output. 
                                 Adds <vdj>_score, <vdj>_identity>, fwr1, fwr2, fwr3, fwr4,
                                 cdr1, cdr2, cdr3, n1_length, n2_length, p3v_length, p5d_length, 
                                 p3d_length, p5j_length and d_frame.''')
    parser_imgt.set_defaults(func=parseIMGT)

    # iHMMuneAlign Aligner
    parser_ihmm = subparsers.add_parser('ihmm', parents=[parser_parent],
                                        formatter_class=CommonHelpFormatter, add_help=False,
                                        help='Process iHMMune-Align output.',
                                        description='Process iHMMune-Align output.')
    group_ihmm = parser_ihmm.add_argument_group('aligner parsing arguments')
    group_ihmm.add_argument('-i', nargs='+', action='store', dest='aligner_files',
                             required=True,
                             help='''iHMMune-Align output file.''')
    group_ihmm.add_argument('-r', nargs='+', action='store', dest='repo', required=True,
                             help='''List of folders and/or FASTA files containing
                                   the set of germline sequences used by iHMMune-Align. These
                                   reference sequences must contain IMGT-numbering spacers (gaps)
                                   in the V segment.''')
    group_ihmm.add_argument('-s', action='store', nargs='+', dest='seq_files',
                             required=True,
                             help='''List of input FASTA files (with .fasta, .fna or .fa
                                  extension) containing sequences.''')
    group_ihmm.add_argument('--10x', action='store', nargs='+', dest='cellranger_file',
                                help='''Table file containing 10X annotations (with .csv or .tsv
                                     extension).''')
    group_ihmm.add_argument('--asis-id', action='store_true', dest='asis_id',
                             help='''Specify to prevent input sequence headers from being parsed
                                  to add new columns to database. Parsing of sequence headers requires
                                  headers to be in the pRESTO annotation format, so this should be specified
                                  when sequence headers are incompatible with the pRESTO annotation scheme.
                                  Note, unrecognized header formats will default to this behavior.''')
    group_ihmm.add_argument('--partial', action='store_true', dest='partial',
                             help='''If specified, include incomplete V(D)J alignments in
                                  the pass file instead of the fail file. An incomplete alignment
                                     is defined as a record for which a valid IMGT-gapped sequence 
                                     cannot be built or that is missing a V gene assignment, 
                                     J gene assignment, junction region, or productivity call.''')
    group_ihmm.add_argument('--extended', action='store_true', dest='extended',
                             help='''Specify to include additional aligner specific fields in the output. 
                                  Adds the path score of the iHMMune-Align hidden Markov model as vdj_score;
                                  adds fwr1, fwr2, fwr3, fwr4, cdr1, cdr2 and cdr3.''')
    parser_ihmm.set_defaults(func=parseIHMM)

    return parser
Esempio n. 14
0
def getArgParser():
    """
    Defines the ArgumentParser

    Arguments: 
    None
                      
    Returns: 
    an ArgumentParser object
    """
    # Define input and output fields
    fields = dedent('''
             output files:
                 clone-pass
                     database with assigned clonal group numbers.
                 clone-fail
                     database with records failing clonal grouping.

             required fields:
                 sequence_id, v_call, j_call, junction
                
             output fields:
                 clone_id
             ''')
    # Define argument parser
    parser = ArgumentParser(
        description=__doc__,
        epilog=fields,
        parents=[getCommonArgParser(format=True, multiproc=True)],
        formatter_class=CommonHelpFormatter,
        add_help=False)

    # Distance cloning method
    group = parser.add_argument_group('cloning arguments')
    group.add_argument(
        '--sf',
        action='store',
        dest='seq_field',
        default=None,
        help='''Field to be used to calculate distance between records.
                              Defaults to junction (airr) or JUNCTION (changeo).'''
    )
    group.add_argument('--vf',
                       action='store',
                       dest='v_field',
                       default=None,
                       help='''Field containing the germline V segment call.
                             Defaults to v_call (airr) or V_CALL (changeo).''')
    group.add_argument('--jf',
                       action='store',
                       dest='j_field',
                       default=None,
                       help='''Field containing the germline J segment call.
                             Defaults to j_call (airr) or J_CALL (changeo).''')
    group.add_argument(
        '--gf',
        nargs='+',
        action='store',
        dest='group_fields',
        default=None,
        help=
        'Additional fields to use for grouping clones aside from V, J and junction length.'
    )
    group.add_argument(
        '--mode',
        action='store',
        dest='mode',
        choices=('allele', 'gene'),
        default=default_index_mode,
        help='''Specifies whether to use the V(D)J allele or gene for
                             initial grouping.''')
    group.add_argument(
        '--act',
        action='store',
        dest='action',
        choices=('first', 'set'),
        default=default_index_action,
        help=
        '''Specifies how to handle multiple V(D)J assignments for initial grouping. 
                             The "first" action will use only the first gene listed.
                             The "set" action will use all gene assignments and construct a larger gene
                             grouping composed of any sequences sharing an assignment or linked to another
                             sequence by a common assignment (similar to single-linkage).'''
    )
    group.add_argument(
        '--model',
        action='store',
        dest='model',
        choices=choices_distance_model,
        default=default_distance_model,
        help=
        '''Specifies which substitution model to use for calculating distance
                             between sequences. The "ham" model is nucleotide Hamming distance and
                             "aa" is amino acid Hamming distance. The "hh_s1f" and "hh_s5f" models are
                             human specific single nucleotide and 5-mer content models, respectively,
                             from Yaari et al, 2013. The "mk_rs1nf" and "mk_rs5nf" models are
                             mouse specific single nucleotide and 5-mer content models, respectively,
                             from Cui et al, 2016. The "m1n_compat" and "hs1f_compat" models are
                             deprecated models provided backwards compatibility with the "m1n" and
                             "hs1f" models in Change-O v0.3.3 and SHazaM v0.1.4. Both
                             5-mer models should be considered experimental.'''
    )
    group.add_argument('--dist',
                       action='store',
                       dest='distance',
                       type=float,
                       default=default_distance,
                       help='The distance threshold for clonal grouping')
    group.add_argument(
        '--norm',
        action='store',
        dest='norm',
        choices=('len', 'mut', 'none'),
        default=default_norm,
        help='''Specifies how to normalize distances. One of none
                             (do not normalize), len (normalize by length),
                             or mut (normalize by number of mutations between sequences).'''
    )
    group.add_argument(
        '--sym',
        action='store',
        dest='sym',
        choices=('avg', 'min'),
        default=default_sym,
        help='''Specifies how to combine asymmetric distances. One of avg
                             (average of A->B and B->A) or min (minimum of A->B and B->A).'''
    )
    group.add_argument(
        '--link',
        action='store',
        dest='linkage',
        choices=('single', 'average', 'complete'),
        default=default_linkage,
        help='''Type of linkage to use for hierarchical clustering.''')
    group.add_argument(
        '--maxmiss',
        action='store',
        dest='max_missing',
        type=int,
        default=default_max_missing,
        help='''The maximum number of non-ACGT characters (gaps or Ns) to 
                             permit in the junction sequence before excluding the record 
                             from clonal assignment. Note, under single linkage 
                             non-informative positions can create artifactual links 
                             between unrelated sequences. Use with caution.''')
    parser.set_defaults(group_func=groupByGene)
    parser.set_defaults(clone_func=distanceClones)

    return parser