コード例 #1
0
sys.path.insert(1, './python')
import csv
import argparse

from clusterpath import ClusterPath
from seqfileopener import get_seqfile_info
import utils

parser = argparse.ArgumentParser()
parser.add_argument('--infname', required=True)
parser.add_argument('--dont-abbreviate', action='store_true', help='Print full seq IDs (otherwise just prints an \'o\')')
parser.add_argument('--n-to-print', type=int, help='How many partitions to print (centered on the best partition)')
parser.add_argument('--datadir', default='data/imgt')
parser.add_argument('--simfname')
parser.add_argument('--is-data', action='store_true')
args = parser.parse_args()

germline_seqs = utils.read_germlines(args.datadir)
cyst_positions = utils.read_cyst_positions(args.datadir)
with open(args.datadir + '/j_tryp.csv') as csv_file:  # get location of <end> tryptophan in each j region
    tryp_reader = csv.reader(csv_file)
    tryp_positions = {row[0]:row[1] for row in tryp_reader}  # WARNING: this doesn't filter out the header line

reco_info = None
if args.simfname is not None:
    input_info, reco_info = get_seqfile_info(args.simfname, args.is_data, germline_seqs, cyst_positions, tryp_positions)

cp = ClusterPath()
cp.readfile(args.infname)
cp.print_partitions(abbreviate=(not args.dont_abbreviate), n_to_print=args.n_to_print, reco_info=reco_info)
コード例 #2
0
parser = argparse.ArgumentParser()
parser.add_argument('ighv_fname', help='input germline v set (presumably a new one), in fasta')
parser.add_argument('--dirname', help='directory name for output (if not specified, we use <infname> with suffix removed)')
parser.add_argument('--reference-dir', default='data/imgt', help='directory with reference/old germline sets')
args = parser.parse_args()
if args.dirname is None:
    args.dirname = os.path.os.path.splitext(args.ighv_fname)[0]

files_to_copy = ['ighd.fasta', 'ighj.fasta', 'j_tryp.csv']
unaligned_fname = 'ighv.fasta'
aligned_fname = 'ighv-aligned.fasta'

# ----------------------------------------------------------------------------------------
# figure out which v genes we need to align
old_aligned_genes = utils.read_germlines(args.reference_dir, only_region='v', aligned=True)
all_new_genes = utils.read_germlines(args.dirname, only_region='v')  # all genes in ighv_fname, not just the new ones
genes_without_alignments = {}
for gene in all_new_genes['v']:
    if gene not in old_aligned_genes['v']:
        genes_without_alignments[gene] = all_new_genes['v'][gene]

# clean_dir()
# shutil.copyfile(args.ighv_fname, args.dirname + '/' + unaligned_fname)
# if len(genes_without_alignments) > 0:
#     align_new_genes(old_aligned_genes['v'], genes_without_alignments, all_new_genes['v'])
# for fname in files_to_copy:
#     shutil.copyfile(args.reference_dir + '/' + fname, args.dirname + '/' + fname)

known_cyst_positions = utils.read_cyst_positions(args.reference_dir)
write_cyst_file(known_cyst_positions)