Ejemplo n.º 1
0
 def cmd_str(iproc):
     clist = copy.deepcopy(sys.argv)
     utils.remove_from_arglist(clist, '--n-tests', has_arg=True)
     utils.remove_from_arglist(clist, '--iteststart', has_arg=True)
     utils.replace_in_arglist(clist, '--outdir', args.outdir + '/' + str(iproc))
     utils.replace_in_arglist(clist, '--seed', str(args.seed + iproc))
     # clist.append('--slurm')
     return ' '.join(clist)
Ejemplo n.º 2
0
 def cmd_str(iproc):
     clist = copy.deepcopy(sys.argv)
     utils.remove_from_arglist(clist, '--comprehensive')
     utils.remove_from_arglist(clist, '--n-tests', has_arg=True)
     utils.replace_in_arglist(clist, '--outdir', args.outdir + '/' + str(iproc))
     utils.replace_in_arglist(clist, '--seed', str(args.seed + iproc))
     # clist.append('--slurm')
     return ' '.join(clist)
Ejemplo n.º 3
0
parser.add_argument('outfile', help='output file name (fasta or csv/tsv)')
parser.add_argument('--extra-columns', help='colon-separated list of additional partis output columns (beyond sequences), to write to the output file. If writing to a fasta file, the column values are appended after the sequence name, separated by --fasta-info-separator. If writing to csv/tsv, they\'re written as proper, labeled columns.')
parser.add_argument('--partition-index', type=int, help='if set, use the partition at this index in the cluster path, rather than the default of using the best partition')
parser.add_argument('--seed-unique-id', help='if set, take sequences only from the cluster containing this seed sequence, rather than the default of taking all sequences from all clusters')
parser.add_argument('--cluster-index', type=int, help='if set, take sequences only from the cluster at this index in the partition, rather than the default of taking all sequences from all clusters. This index is with respect to the cluster order found in the file (which, in contrast to plots made by --plotdir, is *not* sorted by size)')
parser.add_argument('--indel-reversed-seqs', action='store_true', help='if set, take sequences that have had any shm indels "reversed" (i.e. insertions are reversed, and deletions are replaced with the germline bases) rather than the default of using sequences from the original input file. Indel-reversed sequences can be convenient because they are by definition the same length as and aligned to the naive sequence.')
parser.add_argument('--glfo-dir', help='Directory with germline info. Only necessary for old-style csv output files. Equivalent to a parameter dir with \'/hmm/germline-sets\' appended.')
parser.add_argument('--locus', default='igh', help='only used for old-style csv output files')
parser.add_argument('--plotdir', help='if set, plot annotation parameters from --fname to --plotdir and exit (you still have to set outfile, sorry, it\'s nice having it be a positional arg, but it doesn\'t get used). To add e.g. per-gene-per-position plots comment/uncomment args in the call below.')
parser.add_argument('--fasta-info-separator', default=' ', help='character to use ')

if 'extract-fasta.py' in sys.argv[0]:  # if they're trying to run this old script, which is now just a link to this one, print a warning and rejigger the arguments so it still works
    print '  note: running deprecated script %s, which currently is just a link pointing to %s' % (os.path.basename(sys.argv[0]), os.path.basename(os.path.realpath( __file__)))
    print '  note: transferring deprecated arguments --input-file and --fasta-output-file to the first two positional arguments (this will continue to work, you only need to change things if you want this warning to go away)'
    utils.insert_in_arglist(sys.argv, [utils.get_val_from_arglist(sys.argv, '--input-file'), utils.get_val_from_arglist(sys.argv, '--fasta-output-file')], sys.argv[0])
    utils.remove_from_arglist(sys.argv, '--input-file', has_arg=True)
    utils.remove_from_arglist(sys.argv, '--fasta-output-file', has_arg=True)

args = parser.parse_args()
args.extra_columns = utils.get_arg_list(args.extra_columns)
assert utils.getsuffix(args.outfile) in ['.csv', '.tsv', '.fa', '.fasta']

default_glfo_dir = partis_dir + '/data/germlines/human'
if utils.getsuffix(args.infile) == '.csv' and args.glfo_dir is None:
    print '  note: reading deprecated csv format, so need to get germline info from a separate directory; --glfo-dir was not set, so using default %s. If it doesn\'t crash, it\'s probably ok.' % default_glfo_dir
    args.glfo_dir = default_glfo_dir
glfo, annotation_list, cpath = utils.read_output(args.infile, glfo_dir=args.glfo_dir, locus=args.locus)

if args.plotdir is not None:
    from parametercounter import ParameterCounter
    setattr(args, 'region_end_exclusions', {r : [0 for e in ['5p', '3p']] for r in utils.regions})  # hackity hackity hackity