patients = load_patients() if pnames is not None: patients = patients.loc[pnames] data = [] for pname, patient in patients.iterrows(): if VERBOSE >= 1: print patient.code, start, end if submit: fork_self(patient.code, width, gap, start, end, VERBOSE=VERBOSE, freqmin=freqmin, countmin=countmin) continue patient = Patient(patient) ref = patient.get_reference('genomewide') L = len(ref) win_start = start while win_start + width - gap < min(L, end): win_end = min(win_start + width, end, L) if VERBOSE >= 1: print patient.code, win_start, win_end if VERBOSE >= 2: print 'Get region haplotypes' try: datum = patient.get_local_haplotype_count_trajectories(\ 'genomewide', start=win_start,
patients = load_patients() if pnames is not None: patients = patients.loc[pnames] if VERBOSE >= 3: print 'patients', patients.index if not len(patients): raise ValueError('No patients found!') maps_coord = defaultdict(dict) for pname, patient in patients.iterrows(): patient = Patient(patient) # Make maps for all annotations if not explicit if regions is None: patseqann = patient.get_reference('genomewide', format='gb') regionspat = map(attrgetter('id'), patseqann.features) + ['genomewide'] else: regionspat = regions for region in regionspat: if VERBOSE >= 1: print pname, region refseq = load_custom_reference(refname, format='gb', region=region) patseq = patient.get_reference(region) mapco = build_coordinate_map(refseq, patseq, VERBOSE=VERBOSE) mapco = np.array(mapco, int) shift_mapco(mapco, refname, region)
regions = args.regions VERBOSE = args.verbose save_to_file = args.save patients = load_patients() if pnames is not None: patients = patients.loc[pnames] if VERBOSE >= 3: print 'patients', patients.index if not len(patients): raise ValueError('No patients found!') maps_coord = defaultdict(dict) for pname, patient in patients.iterrows(): patient = Patient(patient) # Make maps for all annotations if not explicit if regions is None: patseqann = patient.get_reference('genomewide', format='gb') regionspat = map(attrgetter('id'), patseqann.features) + ['genomewide'] else: regionspat = regions for region in regionspat: if VERBOSE >= 1: print pname, region coomap = patient.get_map_coordinates_reference(region, refname=refname)
if VERBOSE >= 1: print patient.code, start, end if submit: fork_self(patient.code, width, gap, start, end, VERBOSE=VERBOSE, freqmin=freqmin, countmin=countmin) continue patient = Patient(patient) ref = patient.get_reference('genomewide') L = len(ref) win_start = start while win_start + width - gap < min(L, end): win_end = min(win_start + width, end, L) if VERBOSE >= 1: print patient.code, win_start, win_end if VERBOSE >= 2: print 'Get region haplotypes' try: datum = patient.get_local_haplotype_count_trajectories(\ 'genomewide', start=win_start,
help='Verbosity level [0-4]') args = parser.parse_args() pnames = args.patients regions = args.regions VERBOSE = args.verbose patients = load_patients() if pnames: patients = patients.loc[pnames] alis = {} for pname, patient in patients.iterrows(): patient = Patient(patient) patient.discard_nonsequenced_samples() # Guess regions if not specified if regions is None: refseq_gw = patient.get_reference('genomewide', 'gb') regionspat = map(attrgetter('id'), refseq_gw.features) + ['genomewide'] else: regionspat = regions for region in regionspat: if VERBOSE >= 1: print pname, region ali = patient.get_consensi_alignment(region) alis[(region, pname)] = ali
# Prepare output structures n_binsx = 5 binsy = [ 0., 0.002, 0.01, 0.025, 0.12, 0.33, 0.67, 0.88, 0.975, 0.99, 0.998, 1. ] props = {(gene, synkey): Propagator(n_binsx, binsy=binsy, use_logit=use_logit) for gene in genes for synkey in ('syn', 'nonsyn')} for pname, patient in patients.iterrows(): patient = Patient(patient) samplenames = patient.samples.index refseq = patient.get_reference('genomewide', format='gb') for gene in genes: if VERBOSE >= 1: print pname, gene, # Get the right fragment(s) # FIXME: do better than this ;-) frags = {'pol': ['F2', 'F3'], 'gag': ['F1'], 'env': ['F5', 'F6']} fragments = frags[gene] if VERBOSE >= 1: print fragments for fragment in fragments:
if VERBOSE >= 1: for feature in refseq.features: if feature.id[0] == 'F': continue print feature.id, from hivwholeseq.utils.genome_info import genes if feature.type in ('gene', 'protein'): print feature.extract(refseq).seq.translate() else: print feature.extract(refseq).seq print '' try: refseq_old = patient.get_reference('genomewide', format='gb') except IOError: if VERBOSE >= 1: print "Old annotated reference not found (that's ok)" refseq_old = None if refseq_old is not None: try: compare_annotations(refseq, refseq_old, VERBOSE=VERBOSE) except ValueError: if use_force: print 'Annotations differ from old sequence' else: raise if use_save:
if 'genomewide' in fragments: do_genomewide = True del fragments[fragments.index('genomewide')] else: do_genomewide = False for pname, patient in patients.iterrows(): print pname patient = Patient(patient) patient.discard_nonsequenced_samples() for fragment in fragments: if VERBOSE >= 1: print fragment ref = patient.get_reference(fragment) refm = np.array(ref, 'S1') af0 = patient.get_initial_allele_frequencies(fragment) consm = alpha[af0.argmax(axis=0)] seqm = consm.copy() # Gaps in a reference are not wished for if '-' in seqm: seqm[seqm == '-'] = refm[seqm == '-'] seq = SeqRecord(Seq(''.join(seqm), ambiguous_dna), id=ref.id, name=ref.name, description=ref.description) if VERBOSE >= 2:
args = parser.parse_args() VERBOSE = args.verbose fragments = args.fragments patients = load_patients() for fragment in fragments: if VERBOSE >= 1: print fragment refs = [] for pname, patient in patients.iterrows(): if VERBOSE >= 2: print pname patient = Patient(patient) refs.append(patient.get_reference(fragment)) ali = align_muscle(*refs, sort=True) # Check whether all references are complete (using the longest primers) if VERBOSE >= 2: print 'Check alignment' alim = np.array(ali) if (alim[:, :4] == '-').any(): raise ValueError('Gaps at the beginning of fragment found') elif (alim[:, -4:] == '-').any(): raise ValueError('Gaps at the end of fragment found') if VERBOSE >= 2: print 'Save to file' fn = get_reference_alignment_filename(fragment)
patients = patients.loc[patients.code != "p7"] # Prepare output structures n_binsx = 5 binsy = [0.0, 0.002, 0.01, 0.025, 0.12, 0.33, 0.67, 0.88, 0.975, 0.99, 0.998, 1.0] props = { (gene, synkey): Propagator(n_binsx, binsy=binsy, use_logit=use_logit) for gene in genes for synkey in ("syn", "nonsyn") } for pname, patient in patients.iterrows(): patient = Patient(patient) samplenames = patient.samples.index refseq = patient.get_reference("genomewide", format="gb") for gene in genes: if VERBOSE >= 1: print pname, gene, # Get the right fragment(s) # FIXME: do better than this ;-) frags = {"pol": ["F2", "F3"], "gag": ["F1"], "env": ["F5", "F6"]} fragments = frags[gene] if VERBOSE >= 1: print fragments for fragment in fragments: