sys.path.insert(1, './python') import csv import argparse from clusterpath import ClusterPath from seqfileopener import get_seqfile_info import utils parser = argparse.ArgumentParser() parser.add_argument('--infname', required=True) parser.add_argument('--dont-abbreviate', action='store_true', help='Print full seq IDs (otherwise just prints an \'o\')') parser.add_argument('--n-to-print', type=int, help='How many partitions to print (centered on the best partition)') parser.add_argument('--datadir', default='data/imgt') parser.add_argument('--simfname') parser.add_argument('--is-data', action='store_true') args = parser.parse_args() germline_seqs = utils.read_germlines(args.datadir) cyst_positions = utils.read_cyst_positions(args.datadir) with open(args.datadir + '/j_tryp.csv') as csv_file: # get location of <end> tryptophan in each j region tryp_reader = csv.reader(csv_file) tryp_positions = {row[0]:row[1] for row in tryp_reader} # WARNING: this doesn't filter out the header line reco_info = None if args.simfname is not None: input_info, reco_info = get_seqfile_info(args.simfname, args.is_data, germline_seqs, cyst_positions, tryp_positions) cp = ClusterPath() cp.readfile(args.infname) cp.print_partitions(abbreviate=(not args.dont_abbreviate), n_to_print=args.n_to_print, reco_info=reco_info)
parser = argparse.ArgumentParser() parser.add_argument('ighv_fname', help='input germline v set (presumably a new one), in fasta') parser.add_argument('--dirname', help='directory name for output (if not specified, we use <infname> with suffix removed)') parser.add_argument('--reference-dir', default='data/imgt', help='directory with reference/old germline sets') args = parser.parse_args() if args.dirname is None: args.dirname = os.path.os.path.splitext(args.ighv_fname)[0] files_to_copy = ['ighd.fasta', 'ighj.fasta', 'j_tryp.csv'] unaligned_fname = 'ighv.fasta' aligned_fname = 'ighv-aligned.fasta' # ---------------------------------------------------------------------------------------- # figure out which v genes we need to align old_aligned_genes = utils.read_germlines(args.reference_dir, only_region='v', aligned=True) all_new_genes = utils.read_germlines(args.dirname, only_region='v') # all genes in ighv_fname, not just the new ones genes_without_alignments = {} for gene in all_new_genes['v']: if gene not in old_aligned_genes['v']: genes_without_alignments[gene] = all_new_genes['v'][gene] # clean_dir() # shutil.copyfile(args.ighv_fname, args.dirname + '/' + unaligned_fname) # if len(genes_without_alignments) > 0: # align_new_genes(old_aligned_genes['v'], genes_without_alignments, all_new_genes['v']) # for fname in files_to_copy: # shutil.copyfile(args.reference_dir + '/' + fname, args.dirname + '/' + fname) known_cyst_positions = utils.read_cyst_positions(args.reference_dir) write_cyst_file(known_cyst_positions)