Python RecombinationEvent.set_reco_id Examples

Programming Language: Python

Namespace/Package Name: event

Method/Function: set_reco_id

Examples at hotexamples.com: 4

Python RecombinationEvent.set_reco_id - 4 examples found. These are the top rated real world Python examples of event.RecombinationEvent.set_reco_id extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

RecombinationEvent(6)

set_reco_id(4)

print_event(2)

recombined_seq(2)

write_event(2)

set_final_codon_positions(1)

set_final_cyst_tryp_positions(1)

set_post_erosion_codon_positions(1)

unmutated_codons(1)

Example #1

Show file

File: bcr-phylo-run.py Project: linchunjen/partis

def parse_bcr_phylo_output(glfo, naive_line, outdir, ievent):
    seqfos = utils.read_fastx(bcr_phylo_fasta_fname(
        outdir))  # output mutated sequences from bcr-phylo

    assert len(
        naive_line['unique_ids']
    ) == 1  # enforces that we ran naive-only, 1-leaf partis simulation above
    assert not indelutils.has_indels(
        naive_line['indelfos'][0])  # would have to handle this below
    if args.debug:
        utils.print_reco_event(naive_line)
    reco_info = collections.OrderedDict()
    for sfo in seqfos:
        mline = copy.deepcopy(naive_line)
        utils.remove_all_implicit_info(mline)
        del mline['tree']
        mline['unique_ids'] = [sfo['name']]
        mline['seqs'] = [
            sfo['seq']
        ]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        mline['input_seqs'] = [
            sfo['seq']
        ]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        mline['duplicates'] = [[]]
        reco_info[sfo['name']] = mline
        utils.add_implicit_info(glfo, mline)
    final_line = utils.synthesize_multi_seq_line_from_reco_info(
        [sfo['name'] for sfo in seqfos], reco_info)
    if args.debug:
        utils.print_reco_event(final_line)

    # extract kd values from pickle file (use a separate script since it requires ete/anaconda to read)
    if args.stype == 'selection':
        cmd = './bin/read-bcr-phylo-trees.py --pickle-tree-file %s/%s_lineage_tree.p --kdfile %s/kd-vals.csv --newick-tree-file %s/simu.nwk' % (
            outdir, args.extrastr, outdir, outdir)
        utils.run_ete_script(cmd, ete_path)
        nodefo = {}
        with open('%s/kd-vals.csv' % outdir) as kdfile:
            reader = csv.DictReader(kdfile)
            for line in reader:
                nodefo[line['uid']] = {
                    'kd': float(line['kd']),
                    'relative_kd': float(line['relative_kd']),
                    'lambda': line.get('lambda', None),
                    'target_index': int(line['target_index']),
                }
        if len(
                set(nodefo) - set(final_line['unique_ids'])
        ) > 0:  # uids in the kd file but not the <line> (i.e. not in the newick/fasta files) are probably just bcr-phylo discarding internal nodes
            print '        in kd file, but missing from final_line (probably just internal nodes that bcr-phylo wrote to the tree without names): %s' % (
                set(nodefo) - set(final_line['unique_ids']))
        if len(set(final_line['unique_ids']) - set(nodefo)) > 0:
            print '        in final_line, but missing from kdvals: %s' % ' '.join(
                set(final_line['unique_ids']) - set(nodefo))
        final_line['affinities'] = [
            1. / nodefo[u]['kd'] for u in final_line['unique_ids']
        ]
        final_line['relative_affinities'] = [
            1. / nodefo[u]['relative_kd'] for u in final_line['unique_ids']
        ]
        final_line['lambdas'] = [
            nodefo[u]['lambda'] for u in final_line['unique_ids']
        ]
        final_line['nearest_target_indices'] = [
            nodefo[u]['target_index'] for u in final_line['unique_ids']
        ]
        tree = treeutils.get_dendro_tree(treefname='%s/simu.nwk' % outdir)
        tree.scale_edges(1. / numpy.mean([len(s) for s in final_line['seqs']]))
        if args.debug:
            print utils.pad_lines(treeutils.get_ascii_tree(dendro_tree=tree),
                                  padwidth=12)
        final_line['tree'] = tree.as_string(schema='newick')
    tmp_event = RecombinationEvent(
        glfo)  # I don't want to move the function out of event.py right now
    tmp_event.set_reco_id(
        final_line, irandom=ievent
    )  # not sure that setting <irandom> here actually does anything

    # get target sequences
    target_seqfos = utils.read_fastx('%s/%s_targets.fa' %
                                     (outdir, args.extrastr))
    final_line['target_seqs'] = [tfo['seq'] for tfo in target_seqfos]

    return final_line

Example #2

Show file

File: bcr-phylo-run.py Project: Xiujia-Yang/partis

def parse_bcr_phylo_output(glfo, naive_line, outdir, ievent):
    seqfos = utils.read_fastx(bcr_phylo_fasta_fname(outdir))  # output mutated sequences from bcr-phylo

    assert len(naive_line['unique_ids']) == 1  # enforces that we ran naive-only, 1-leaf partis simulation above
    assert not indelutils.has_indels(naive_line['indelfos'][0])  # would have to handle this below
    if args.debug:
        utils.print_reco_event(naive_line)
    reco_info = collections.OrderedDict()
    for sfo in seqfos:
        mline = copy.deepcopy(naive_line)
        utils.remove_all_implicit_info(mline)
        del mline['tree']
        mline['unique_ids'] = [sfo['name']]
        mline['seqs'] = [sfo['seq']]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        mline['input_seqs'] = [sfo['seq']]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        mline['duplicates'] = [[]]
        reco_info[sfo['name']] = mline
        try:
            utils.add_implicit_info(glfo, mline)
        except:  # TODO not sure if I really want to leave this in long term, but it shouldn't hurt anything (it's crashing on unequal naive/mature sequence lengths, and I need this to track down which event it is) UPDATE: yeah it was just because something crashed in the middle of writing a .fa file
            print 'implicit info adding failed for ievent %d in %s' % (ievent, outdir)
            lines = traceback.format_exception(*sys.exc_info())
            print utils.pad_lines(''.join(lines))  # NOTE this will still crash on the next line if implicit info adding failed
    final_line = utils.synthesize_multi_seq_line_from_reco_info([sfo['name'] for sfo in seqfos], reco_info)
    if args.debug:
        utils.print_reco_event(final_line)

    # extract kd values from pickle file (use a separate script since it requires ete/anaconda to read)
    if args.stype == 'selection':
        kdfname, nwkfname = '%s/kd-vals.csv' % outdir, '%s/simu.nwk' % outdir
        if not utils.output_exists(args, kdfname, outlabel='kd/nwk conversion', offset=4):  # eh, don't really need to check for both kd an nwk file, chances of only one being missing are really small, and it'll just crash when it looks for it a couple lines later
            cmd = './bin/read-bcr-phylo-trees.py --pickle-tree-file %s/%s_lineage_tree.p --kdfile %s --newick-tree-file %s' % (outdir, args.extrastr, kdfname, nwkfname)
            utils.run_ete_script(cmd, ete_path, debug=args.n_procs==1)
        nodefo = {}
        with open(kdfname) as kdfile:
            reader = csv.DictReader(kdfile)
            for line in reader:
                nodefo[line['uid']] = {
                    'kd' : float(line['kd']),
                    'relative_kd' : float(line['relative_kd']),
                    'lambda' : line.get('lambda', None),
                    'target_index' : int(line['target_index']),
                }
        if len(set(nodefo) - set(final_line['unique_ids'])) > 0:  # uids in the kd file but not the <line> (i.e. not in the newick/fasta files) are probably just bcr-phylo discarding internal nodes
            print '        in kd file, but missing from final_line (probably just internal nodes that bcr-phylo wrote to the tree without names): %s' % (set(nodefo) - set(final_line['unique_ids']))
        if len(set(final_line['unique_ids']) - set(nodefo)) > 0:
            print '        in final_line, but missing from kdvals: %s' % ' '.join(set(final_line['unique_ids']) - set(nodefo))
        final_line['affinities'] = [1. / nodefo[u]['kd'] for u in final_line['unique_ids']]
        final_line['relative_affinities'] = [1. / nodefo[u]['relative_kd'] for u in final_line['unique_ids']]
        final_line['lambdas'] = [nodefo[u]['lambda'] for u in final_line['unique_ids']]
        final_line['nearest_target_indices'] = [nodefo[u]['target_index'] for u in final_line['unique_ids']]
        tree = treeutils.get_dendro_tree(treefname=nwkfname)
        tree.scale_edges(1. / numpy.mean([len(s) for s in final_line['seqs']]))
        if args.debug:
            print utils.pad_lines(treeutils.get_ascii_tree(dendro_tree=tree), padwidth=12)
        final_line['tree'] = tree.as_string(schema='newick')
    tmp_event = RecombinationEvent(glfo)  # I don't want to move the function out of event.py right now
    tmp_event.set_reco_id(final_line, irandom=ievent)  # not sure that setting <irandom> here actually does anything

    # get target sequences
    target_seqfos = utils.read_fastx('%s/%s_targets.fa' % (outdir, args.extrastr))
    final_line['target_seqs'] = [tfo['seq'] for tfo in target_seqfos]

    return final_line

Example #3

Show file

def parse_bcr_phylo_output(glfo, naive_line, outdir, ievent):
    seqfos = utils.read_fastx(
        '%s/%s.fasta' %
        (outdir, args.extrastr))  # output mutated sequences from bcr-phylo

    assert len(
        naive_line['unique_ids']
    ) == 1  # enforces that we ran naive-only, 1-leaf partis simulation above
    assert not indelutils.has_indels(
        naive_line['indelfos'][0])  # would have to handle this below
    if args.debug:
        utils.print_reco_event(naive_line)
    reco_info = collections.OrderedDict()
    for sfo in seqfos:
        mline = copy.deepcopy(naive_line)
        utils.remove_all_implicit_info(mline)
        del mline['tree']
        mline['unique_ids'] = [sfo['name']]
        mline['seqs'] = [
            sfo['seq']
        ]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        mline['input_seqs'] = [
            sfo['seq']
        ]  # it's really important to set both the seqs (since they're both already in there from the naive line)
        reco_info[sfo['name']] = mline
        utils.add_implicit_info(glfo, mline)
    final_line = utils.synthesize_multi_seq_line_from_reco_info(
        [sfo['name'] for sfo in seqfos], reco_info)
    if args.debug:
        utils.print_reco_event(final_line)

    # extract kd values from pickle file (use a separate script since it requires ete/anaconda to read)
    if args.stype == 'selection':
        cmd = 'export PATH=%s:$PATH && xvfb-run -a python ./bin/view-trees.py --pickle-tree-file %s/%s_lineage_tree.p --kdfile %s/kd-vals.csv --newick-tree-file %s/simu.nwk' % (
            ete_path, outdir, args.extrastr, outdir, outdir)
        utils.simplerun(cmd, shell=True)
        kdvals = {}
        with open('%s/kd-vals.csv' % outdir) as kdfile:
            reader = csv.DictReader(kdfile)
            for line in reader:
                kdvals[line['uid']] = float(line['kd'])
        if len(
                set(kdvals) - set(final_line['unique_ids'])
        ) > 0:  # uids in the kd file but not the <line> (i.e. not in the newick/fasta files) are probably just bcr-phylo discarding internal nodes
            print '        in kd file, but missing from final_line (probably just internal nodes that bcr-phylo wrote to the tree without names): %s' % (
                set(kdvals) - set(final_line['unique_ids']))
        if len(set(final_line['unique_ids']) - set(kdvals)) > 0:
            print '        in final_line, but missing from kdvals: %s' % ' '.join(
                set(final_line['unique_ids']) - set(kdvals))
        final_line['affinities'] = [
            1. / kdvals[u] for u in final_line['unique_ids']
        ]
        tree = treeutils.get_dendro_tree(treefname='%s/simu.nwk' % outdir)
        if args.debug:
            print utils.pad_lines(treeutils.get_ascii_tree(dendro_tree=tree),
                                  padwidth=12)
        final_line['tree'] = tree.as_string(schema='newick')
    tmp_event = RecombinationEvent(
        glfo)  # I don't want to move the function out of event.py right now
    tmp_event.set_reco_id(
        final_line, irandom=ievent
    )  # not sure that setting <irandom> here actually does anything

    # get target sequences
    target_seqfos = utils.read_fastx('%s/%s_targets.fa' %
                                     (outdir, args.extrastr))
    final_line['target_seqs'] = [tfo['seq'] for tfo in target_seqfos]
    from Bio.Seq import Seq
    final_line['nearest_target_indices'] = []
    aa_targets = [Seq(seq).translate() for seq in final_line['target_seqs']]
    for mseq in final_line['input_seqs']:
        aa_mseq = Seq(mseq).translate()
        aa_hdists = [
            utils.hamming_distance(aa_t, aa_mseq, amino_acid=True)
            for aa_t in aa_targets
        ]
        imin = aa_hdists.index(
            min(aa_hdists)
        )  # NOTE doesn't do anything differently if there's more than one min
        final_line['nearest_target_indices'].append(imin)

    return final_line

Example #4

Show file

File: bcr-phylo-run.py Project: AkselObdrup/partis

    def get_mature_line(sfos,
                        naive_line,
                        glfo,
                        nodefo,
                        dtree,
                        target_sfos,
                        locus=None):
        assert len(
            naive_line['unique_ids']
        ) == 1  # enforces that we ran naive-only, 1-leaf partis simulation above
        assert not indelutils.has_indels(
            naive_line['indelfos'][0])  # would have to handle this below
        if args.debug:
            utils.print_reco_event(naive_line)
        reco_info = collections.OrderedDict()
        for sfo in sfos:
            mline = utils.get_non_implicit_copy(naive_line)
            del mline['tree']
            mline['unique_ids'] = [sfo['name']]
            mline['seqs'] = [sfo['seq']]
            mline['input_seqs'] = [
                sfo['seq']
            ]  # it's really important to set both the seqs (since they're both already in there from the naive line)
            mline['duplicates'] = [[]]
            reco_info[sfo['name']] = mline
            try:
                utils.add_implicit_info(glfo, mline)
            except:  # TODO not sure if I really want to leave this in long term, but it shouldn't hurt anything (it's crashing on unequal naive/mature sequence lengths, and I need this to track down which event it is) UPDATE: yeah it was just because something crashed in the middle of writing a .fa file
                print 'implicit info adding failed for ievent %d in %s' % (
                    ievent, outdir)
                lines = traceback.format_exception(*sys.exc_info())
                print utils.pad_lines(
                    ''.join(lines)
                )  # NOTE this will still crash on the next line if implicit info adding failed
        final_line = utils.synthesize_multi_seq_line_from_reco_info(
            [sfo['name'] for sfo in sfos], reco_info)

        ftree = copy.deepcopy(dtree)
        if locus is not None:

            def ltr(u):
                return u + '-' + locus

            new_nodefo = {}
            for u_old in nodefo:
                new_nodefo[ltr(u_old)] = nodefo[u_old]
            nodefo = new_nodefo
            treeutils.translate_labels(ftree,
                                       [(u, ltr(u))
                                        for u in final_line['unique_ids']])
            final_line['unique_ids'] = [
                ltr(u) for u in final_line['unique_ids']
            ]
            assert len(sfos) == len(final_line['unique_ids'])
            for iseq, sfo in enumerate(sfos):
                naive_id = naive_line['unique_ids'][0]
                assert naive_id.count('-') == 1
                bstr = naive_id.replace('-' + locus, '')
                pids = final_line['paired-uids'][iseq]
                assert len(pids) == 1 and pids[0].find(
                    bstr
                ) == 0 and pids[0].count('-') == 1 and pids[0].split(
                    '-'
                )[1] in utils.loci  # if uid is xxx-igh, paired id shoud be e.g. xxx-igk
                final_line['paired-uids'][iseq] = [
                    p.replace(bstr, sfo['name']) for p in pids
                ]

        if args.debug:
            utils.print_reco_event(final_line)

        # extract kd values from pickle file (use a separate script since it requires ete/anaconda to read)
        if len(
                set(nodefo) - set(final_line['unique_ids'])
        ) > 0:  # uids in the kd file but not the <line> (i.e. not in the newick/fasta files) are probably just bcr-phylo discarding internal nodes
            print '        in kd file, but missing from final_line (probably just internal nodes that bcr-phylo wrote to the tree without names): %s' % (
                set(nodefo) - set(final_line['unique_ids']))
        if len(set(final_line['unique_ids']) - set(nodefo)) > 0:
            print '        in final_line, but missing from kdvals: %s' % ' '.join(
                set(final_line['unique_ids']) - set(nodefo))
        final_line['affinities'] = [
            1. / nodefo[u]['kd'] for u in final_line['unique_ids']
        ]
        final_line['relative_affinities'] = [
            1. / nodefo[u]['relative_kd'] for u in final_line['unique_ids']
        ]
        final_line['lambdas'] = [
            nodefo[u]['lambda'] for u in final_line['unique_ids']
        ]
        final_line['nearest_target_indices'] = [
            nodefo[u]['target_index'] for u in final_line['unique_ids']
        ]
        ftree.scale_edges(1. / numpy.mean([len(s)
                                           for s in final_line['seqs']]))
        if args.debug:
            print utils.pad_lines(treeutils.get_ascii_tree(dendro_tree=ftree),
                                  padwidth=12)
        final_line['tree'] = ftree.as_string(schema='newick')

        tmp_event = RecombinationEvent(
            glfo
        )  # I don't want to move the function out of event.py right now
        tmp_event.set_reco_id(
            final_line, irandom=ievent
        )  # not sure that setting <irandom> here actually does anything
        final_line['target_seqs'] = [tfo['seq'] for tfo in target_sfos]
        return final_line