Beispiel #1
0
    def samples_seq(self):
        '''Sequencing samples that refer to this patient sample'''
        if self._sequenced_samples is None:

            #TODO: optimize this call
            from hivwholeseq.patients.filenames import get_mapped_to_initial_filename
            from hivwholeseq.sequencing.samples import load_samples_sequenced as lss
            samples_seq = lss()
            samples_seq = samples_seq.loc[samples_seq['patient sample'] ==
                                          self.name]
            self._sequenced_samples = samples_seq

        return self._sequenced_samples.copy()
Beispiel #2
0
    def discard_nonsequenced_samples(self):
        '''Discard all samples that have not been sequenced yet'''
        from hivwholeseq.sequencing.samples import load_samples_sequenced as lss
        samples_sequenced = lss()
        samples_sequenced_set = set(samples_sequenced.loc[:, 'patient sample']) - set(['nan'])
        samples = self.samples.loc[self.samples.index.isin(samples_sequenced_set)]

        ## Add info on sequencing
        ## FIXME: why is this here?!
        ## FIXME: this is buggy is so many ways... pandas is nto great at this
        #samples_seq_col = []
        #for samplename in samples.index:
        #    ind = samples_sequenced.loc[:, 'patient sample'] == samplename
        #    samples_seq_col.append(samples_sequenced.loc[ind])

        #samples.loc[:, 'samples seq'] = samples_seq_col

        self.samples = samples
Beispiel #3
0
    def discard_nonsequenced_samples(self):
        '''Discard all samples that have not been sequenced yet'''
        from hivwholeseq.sequencing.samples import load_samples_sequenced as lss
        samples_sequenced = lss()
        samples_sequenced_set = set(
            samples_sequenced.loc[:, 'patient sample']) - set(['nan'])
        samples = self.samples.loc[self.samples.index.isin(
            samples_sequenced_set)]

        ## Add info on sequencing
        ## FIXME: why is this here?!
        ## FIXME: this is buggy is so many ways... pandas is nto great at this
        #samples_seq_col = []
        #for samplename in samples.index:
        #    ind = samples_sequenced.loc[:, 'patient sample'] == samplename
        #    samples_seq_col.append(samples_sequenced.loc[ind])

        #samples.loc[:, 'samples seq'] = samples_seq_col

        self.samples = samples
    parser = argparse.ArgumentParser(description="make figure")
    parser.add_argument('--redo', action='store_true', help='recalculate data')
    params = parser.parse_args()

    fragment = 'F1'
    VERBOSE = 2
    username = os.path.split(os.getenv('HOME'))[-1]

    foldername = get_figure_folder(username, 'first')
    fn_data = foldername+'data/'
    mkdirs(fn_data)
    fn_data = fn_data + 'minor_alleles_example.pickle'

    if not os.path.isfile(fn_data) or params.redo:
        samplename = 'NL4-3'
        sample = lss(samplename)
        counts = sample.get_allele_counts(fragment, merge_read_types=True)
        data = compress_data(counts, samplename, fragment)

        samplename = '27134'
        sample = lssp(samplename)
        counts = sample.get_allele_counts(fragment, merge_read_types=True)
        data = compress_data(counts, samplename, fragment, data=data)


        store_data(data, fn_data)
    else:
        data = load_data(fn_data)

    plot_minor_allele_example(data,
                              VERBOSE=VERBOSE,
import sys
import os

from hivwholeseq.utils.generic import mkdirs
from hivwholeseq.patients.samples import itersample
from hivwholeseq.sequencing.samples import load_samples_sequenced as lss
from hivwholeseq.patients.samples import load_samples_sequenced as lssp
from hivwholeseq.sequencing.filenames import get_sample_foldername



# Script
if __name__ == '__main__':

    samples_pat = lssp()
    samples_seq = lss()

    for samplename, sample in itersample(samples_pat):
        root_foldername = sample.get_foldername()+'samples_sequencing/'
        mkdirs(root_foldername)

        for samplenameseq, sampleseq in samples_seq.iterrows():
            if sampleseq['patient sample'] == samplename:
                src_folder = get_sample_foldername(samplenameseq)
                dst_folder = root_foldername+samplenameseq
                if not os.path.islink(dst_folder):
                    os.symlink(src_folder, dst_folder)
                    print 'Symlink:', src_folder, dst_folder
                else:
                    print 'Esists:', dst_folder
Beispiel #6
0
                        help='Execute the script in parallel on the cluster')

    args = parser.parse_args()
    seq_runs = args.runs
    adaIDs = args.adaIDs
    use_pats = args.use_pats
    use_interactive = args.interactive
    detail = args.detail
    submit = args.submit

    if submit:
        fork_self(seq_runs, adaIDs=adaIDs, pats=use_pats, detail=detail)
        sys.exit()

    samples_pat = lssp(include_wrong=True)
    samples = lss()

    samples = samples.loc[samples['seq run'].isin(seq_runs)]
    if adaIDs is not None:
        samples = samples.loc[samples.adapter.isin(adaIDs)]

    if len(seq_runs) >= 2:
        samples.sort(columns=['patient sample', 'seq run'], inplace=True)

    for isa, (samplename, sample) in enumerate(samples.iterrows()):
        sample = SampleSeq(sample)
        print sample.name, 'seq:', sample['seq run'], sample.adapter,
        if sample['patient sample'] == 'nan':
            print 'not a patient sample',
            if use_pats:
                print '(skip)'
                        help='Number of reads analyzed')
    parser.add_argument('--verbose', type=int, default=0,
                        help='Verbosity level [0-3]')
    parser.add_argument('--minor-allele', action='store_true', dest='minor_allele',
                        help='Plot also minor allele')

    args = parser.parse_args()
    samplenames = args.samples
    seq_runs = args.runs
    adaIDs = args.adaIDs
    fragments = args.fragments
    maxreads = args.maxreads
    VERBOSE = args.verbose
    use_minor_allele = args.minor_allele

    samples = lss()
    if samplenames is not None:
        samples = samples.loc[samples.index.isin(samplenames)]

    else:
        ind = np.zeros(len(samples), bool)
        for seq_run in seq_runs:
            dataset = load_sequencing_run(seq_run)
            data_folder = dataset.folder
            samples_run = dataset.samples
            # If the script is called with no adaID, iterate over all
            if adaIDs is not None:
                samples_run = samples_run.loc[samples_run.adapter.isin(adaIDs)]

            ind |= samples.index.isin(samples_run.index)
Beispiel #8
0
    parser = argparse.ArgumentParser(description="make figure")
    parser.add_argument('--redo', action='store_true', help='recalculate data')
    params = parser.parse_args()

    fragment = 'F1'
    VERBOSE = 2
    username = os.path.split(os.getenv('HOME'))[-1]

    foldername = get_figure_folder(username, 'first')
    fn_data = foldername + 'data/'
    mkdirs(fn_data)
    fn_data = fn_data + 'minor_alleles_example.pickle'

    if not os.path.isfile(fn_data) or params.redo:
        samplename = 'NL4-3'
        sample = lss(samplename)
        counts = sample.get_allele_counts(fragment, merge_read_types=True)
        data = compress_data(counts, samplename, fragment)

        samplename = '27134'
        sample = lssp(samplename)
        counts = sample.get_allele_counts(fragment, merge_read_types=True)
        data = compress_data(counts, samplename, fragment, data=data)

        store_data(data, fn_data)
    else:
        data = load_data(fn_data)

    plot_minor_allele_example(
        data,
        VERBOSE=VERBOSE,