コード例 #1
0
    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]

    data = []
    for pname, patient in patients.iterrows():
        if VERBOSE >= 1:
            print patient.code, start, end

        if submit:
            fork_self(patient.code, width, gap, start, end, VERBOSE=VERBOSE,
                      freqmin=freqmin, countmin=countmin)
            continue

        patient = Patient(patient)
        ref = patient.get_reference('genomewide')
        L = len(ref)

        win_start = start
        while win_start + width - gap < min(L, end):
            win_end = min(win_start + width, end, L)

            if VERBOSE >= 1:
                print patient.code, win_start, win_end
    
            if VERBOSE >= 2:
                print 'Get region haplotypes'
            try:
                datum = patient.get_local_haplotype_count_trajectories(\
                               'genomewide',
                               start=win_start,
コード例 #2
0
    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]
    if VERBOSE >= 3:
        print 'patients', patients.index
    if not len(patients):
        raise ValueError('No patients found!')

    maps_coord = defaultdict(dict)
    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        # Make maps for all annotations if not explicit
        if regions is None:
            patseqann = patient.get_reference('genomewide', format='gb')
            regionspat = map(attrgetter('id'),
                             patseqann.features) + ['genomewide']
        else:
            regionspat = regions

        for region in regionspat:
            if VERBOSE >= 1:
                print pname, region

            refseq = load_custom_reference(refname, format='gb', region=region)
            patseq = patient.get_reference(region)

            mapco = build_coordinate_map(refseq, patseq, VERBOSE=VERBOSE)
            mapco = np.array(mapco, int)
            shift_mapco(mapco, refname, region)
コード例 #3
0
    regions = args.regions
    VERBOSE = args.verbose
    save_to_file = args.save

    patients = load_patients()
    if pnames is not None:
        patients = patients.loc[pnames]
    if VERBOSE >= 3:
        print 'patients', patients.index
    if not len(patients):
        raise ValueError('No patients found!')

    maps_coord = defaultdict(dict)
    for pname, patient in patients.iterrows():
        patient = Patient(patient)

        # Make maps for all annotations if not explicit
        if regions is None:
            patseqann = patient.get_reference('genomewide', format='gb')
            regionspat = map(attrgetter('id'), patseqann.features) + ['genomewide']
        else:
            regionspat = regions

        for region in regionspat:
            if VERBOSE >= 1:
                print pname, region

            coomap = patient.get_map_coordinates_reference(region,
                                                           refname=refname)

コード例 #4
0
        if VERBOSE >= 1:
            print patient.code, start, end

        if submit:
            fork_self(patient.code,
                      width,
                      gap,
                      start,
                      end,
                      VERBOSE=VERBOSE,
                      freqmin=freqmin,
                      countmin=countmin)
            continue

        patient = Patient(patient)
        ref = patient.get_reference('genomewide')
        L = len(ref)

        win_start = start
        while win_start + width - gap < min(L, end):
            win_end = min(win_start + width, end, L)

            if VERBOSE >= 1:
                print patient.code, win_start, win_end

            if VERBOSE >= 2:
                print 'Get region haplotypes'
            try:
                datum = patient.get_local_haplotype_count_trajectories(\
                               'genomewide',
                               start=win_start,
コード例 #5
0
                        help='Verbosity level [0-4]')

    args = parser.parse_args()
    pnames = args.patients
    regions = args.regions
    VERBOSE = args.verbose

    patients = load_patients()
    if pnames:
        patients = patients.loc[pnames]

    alis = {}
    for pname, patient in patients.iterrows():
        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        # Guess regions if not specified
        if regions is None:
            refseq_gw = patient.get_reference('genomewide', 'gb')
            regionspat = map(attrgetter('id'), refseq_gw.features) + ['genomewide']
        else:
            regionspat = regions

        for region in regionspat:
            if VERBOSE >= 1:
                print pname, region

            ali = patient.get_consensi_alignment(region)

            alis[(region, pname)] = ali
コード例 #6
0
    # Prepare output structures
    n_binsx = 5
    binsy = [
        0., 0.002, 0.01, 0.025, 0.12, 0.33, 0.67, 0.88, 0.975, 0.99, 0.998, 1.
    ]
    props = {(gene, synkey): Propagator(n_binsx,
                                        binsy=binsy,
                                        use_logit=use_logit)
             for gene in genes for synkey in ('syn', 'nonsyn')}

    for pname, patient in patients.iterrows():
        patient = Patient(patient)
        samplenames = patient.samples.index

        refseq = patient.get_reference('genomewide', format='gb')

        for gene in genes:

            if VERBOSE >= 1:
                print pname, gene,

            # Get the right fragment(s)
            # FIXME: do better than this ;-)
            frags = {'pol': ['F2', 'F3'], 'gag': ['F1'], 'env': ['F5', 'F6']}
            fragments = frags[gene]

            if VERBOSE >= 1:
                print fragments

            for fragment in fragments:
コード例 #7
0
        if VERBOSE >= 1:
            for feature in refseq.features:
                if feature.id[0] == 'F':
                    continue

                print feature.id, 
                from hivwholeseq.utils.genome_info import genes
                if feature.type in ('gene', 'protein'):
                    print feature.extract(refseq).seq.translate()
                else:
                    print feature.extract(refseq).seq
            print ''


        try:
            refseq_old = patient.get_reference('genomewide', format='gb')
        except IOError:
            if VERBOSE >= 1:
                print "Old annotated reference not found (that's ok)"
            refseq_old = None

        if refseq_old is not None:
            try:
                compare_annotations(refseq, refseq_old, VERBOSE=VERBOSE)
            except ValueError:
                if use_force:
                    print 'Annotations differ from old sequence'
                else:
                    raise

        if use_save:
コード例 #8
0
    if 'genomewide' in fragments:
        do_genomewide = True
        del fragments[fragments.index('genomewide')]
    else:
        do_genomewide = False

    for pname, patient in patients.iterrows():
        print pname
        patient = Patient(patient)
        patient.discard_nonsequenced_samples()

        for fragment in fragments:
            if VERBOSE >= 1:
                print fragment

            ref = patient.get_reference(fragment)
            refm = np.array(ref, 'S1')
            af0 = patient.get_initial_allele_frequencies(fragment)
            consm = alpha[af0.argmax(axis=0)]

            seqm = consm.copy()
            # Gaps in a reference are not wished for
            if '-' in seqm:
                seqm[seqm == '-'] = refm[seqm == '-']

            seq = SeqRecord(Seq(''.join(seqm), ambiguous_dna),
                            id=ref.id,
                            name=ref.name,
                            description=ref.description)

            if VERBOSE >= 2:
コード例 #9
0
    args = parser.parse_args()
    VERBOSE = args.verbose
    fragments = args.fragments

    patients = load_patients()

    for fragment in fragments:
        if VERBOSE >= 1:
            print fragment

        refs = []
        for pname, patient in patients.iterrows():
            if VERBOSE >= 2:
                print pname
            patient = Patient(patient)
            refs.append(patient.get_reference(fragment))

        ali = align_muscle(*refs, sort=True)

        # Check whether all references are complete (using the longest primers)
        if VERBOSE >= 2:
            print 'Check alignment'
        alim = np.array(ali)
        if (alim[:, :4] == '-').any():
            raise ValueError('Gaps at the beginning of fragment found')
        elif (alim[:, -4:] == '-').any():
            raise ValueError('Gaps at the end of fragment found')

        if VERBOSE >= 2:
            print 'Save to file'
        fn = get_reference_alignment_filename(fragment)
コード例 #10
0
    args = parser.parse_args()
    VERBOSE = args.verbose
    fragments = args.fragments

    patients = load_patients()

    for fragment in fragments:
        if VERBOSE >= 1:
            print fragment

        refs = []
        for pname, patient in patients.iterrows():
            if VERBOSE >= 2:
                print pname
            patient = Patient(patient)
            refs.append(patient.get_reference(fragment))

        ali = align_muscle(*refs, sort=True)

        # Check whether all references are complete (using the longest primers)
        if VERBOSE >= 2:
            print 'Check alignment'
        alim = np.array(ali)
        if (alim[:, :4] == '-').any():
            raise ValueError('Gaps at the beginning of fragment found')
        elif (alim[:, -4:] == '-').any():
            raise ValueError('Gaps at the end of fragment found')

        if VERBOSE >= 2:
            print 'Save to file'
        fn = get_reference_alignment_filename(fragment)
コード例 #11
0
        patients = patients.loc[patients.code != "p7"]

    # Prepare output structures
    n_binsx = 5
    binsy = [0.0, 0.002, 0.01, 0.025, 0.12, 0.33, 0.67, 0.88, 0.975, 0.99, 0.998, 1.0]
    props = {
        (gene, synkey): Propagator(n_binsx, binsy=binsy, use_logit=use_logit)
        for gene in genes
        for synkey in ("syn", "nonsyn")
    }

    for pname, patient in patients.iterrows():
        patient = Patient(patient)
        samplenames = patient.samples.index

        refseq = patient.get_reference("genomewide", format="gb")

        for gene in genes:

            if VERBOSE >= 1:
                print pname, gene,

            # Get the right fragment(s)
            # FIXME: do better than this ;-)
            frags = {"pol": ["F2", "F3"], "gag": ["F1"], "env": ["F5", "F6"]}
            fragments = frags[gene]

            if VERBOSE >= 1:
                print fragments

            for fragment in fragments:
コード例 #12
0
        if VERBOSE >= 1:
            for feature in refseq.features:
                if feature.id[0] == 'F':
                    continue

                print feature.id,
                from hivwholeseq.utils.genome_info import genes
                if feature.type in ('gene', 'protein'):
                    print feature.extract(refseq).seq.translate()
                else:
                    print feature.extract(refseq).seq
            print ''

        try:
            refseq_old = patient.get_reference('genomewide', format='gb')
        except IOError:
            if VERBOSE >= 1:
                print "Old annotated reference not found (that's ok)"
            refseq_old = None

        if refseq_old is not None:
            try:
                compare_annotations(refseq, refseq_old, VERBOSE=VERBOSE)
            except ValueError:
                if use_force:
                    print 'Annotations differ from old sequence'
                else:
                    raise

        if use_save: