Python load_library_tables Examples, woldrnaseq.models.load_library_tables Python Examples

Example #1

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

    def test_load_find_library_analysis_file(self):
        mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
        mm10 = models.load_library_tables([mm10tsv])

        cwd_files = list(models.find_library_analysis_file(mm10, '*.coverage'))
        self.assertGreaterEqual(len(cwd_files), 1)
        for f in cwd_files:
            self.assertTrue(isinstance(f, models.AnalysisFile))

        with TemporaryDirectory() as analysis_dir:
            with chdir(analysis_dir):
                mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
                tmpname = os.path.join(analysis_dir, 'library-mm10-se.tsv')
                shutil.copy(mm10tsv, tmpname)
                analysis_root = os.path.dirname(mm10tsv)
                mm10 = models.load_library_tables([tmpname],
                                                  analysis_root=analysis_root)

                abs_files = list(models.find_library_analysis_file(mm10, '*.coverage'))
                self.assertGreaterEqual(len(abs_files), 1)
                for f in abs_files:
                    self.assertTrue(isinstance(f, models.AnalysisFile))

        self.assertEqual(len(cwd_files), len(abs_files))
        self.assertEqual(cwd_files[0].filename, abs_files[0].filename)

Example #2

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_library(self):
     mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
     hg38tsv = resource_filename(__name__, 'library-hg38-se.tsv')
     mm10 = models.load_library_tables([mm10tsv])
     self.assertEqual(len(mm10), count_valid_records(mm10tsv))
     hg38 = models.load_library_tables([hg38tsv])
     both = models.load_library_tables([mm10tsv, hg38tsv])
     self.assertEqual(len(mm10) + len(hg38), len(both))

Example #3

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_library(self):
     mm10tsv = resource_filename(__name__, "library-mm10-se.tsv")
     hg38tsv = resource_filename(__name__, "library-hg38-se.tsv")
     mm10 = models.load_library_tables([mm10tsv])
     self.assertEqual(len(mm10), count_valid_records(mm10tsv))
     hg38 = models.load_library_tables([hg38tsv])
     both = models.load_library_tables([mm10tsv, hg38tsv])
     self.assertEqual(len(mm10) + len(hg38), len(both))

Example #4

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_library_analysis_root(self):
     with TemporaryDirectory() as analysis_dir:
         with chdir(analysis_dir):
             mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
             tmpname = os.path.join(analysis_dir, 'library-mm10-se.tsv')
             shutil.copy(mm10tsv, tmpname)
             analysis_root = os.path.dirname(mm10tsv)
             mm10 = models.load_library_tables([mm10tsv])
             mm10tmp = models.load_library_tables([tmpname],
                                                  analysis_root=analysis_root)
             for i in mm10['analysis_dir'].index:
                 self.assertEqual(mm10['analysis_dir'][i],
                                  mm10tmp['analysis_dir'][i])

Example #5

0

Show file

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)

    if not validate_library_file_existance(args):
        parser.error('Fix incorrect library file names')

    library_filenames = args.libraries
    if len(library_filenames) == 0:
        parser.error('Need library information table')

    libraries = load_library_tables(library_filenames, sep)

    custom_tracks = []
    for library_id, library in libraries.iterrows():
        if args.bigwig:
            custom_tracks.extend(
                make_bigwig_custom_tracks(library, args.web_root, args.root))

        if args.bam:
            custom_tracks.append(
                make_bam_custom_track(library, args.web_root, args.root))

    print(os.linesep.join(custom_tracks))

Example #6

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_all_star_counts(self):
     mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
     mm10 = models.load_library_tables([mm10tsv])
     scores = models.load_all_star_counts(mm10, '+')
     self.assertEqual(scores.shape, (11, 2))
     self.assertEqual(scores.index.name, 'gene_id')
     self.assertEqual(list(scores.columns), ['12304', '12305'])

Example #7

0

Show file

def main(cmdline=None):
    parser = ArgumentParser()
    parser.add_argument('-l',
                        '--library',
                        required=True,
                        action='append',
                        help="library table to load")
    parser.add_argument('-o', '--output', help='filename to write report to')
    args = parser.parse_args(cmdline)

    libraries = load_library_tables(args.library)

    metrics = []
    for library_id, library in libraries.iterrows():
        genome_triple = genome_name_from_library(library)
        filename = library.analysis_name + '-' + genome_triple + '_picard_markdup.metrics'
        pathname = Path(library.analysis_dir) / filename
        if pathname.exists():
            picard_metric = parse_picard_metric(pathname,
                                                library_id=library_id)
            metrics.append(picard_metric)
        else:
            print('{} is missing. Skipping'.format(pathname))

    metrics = pandas.DataFrame(metrics)
    metrics.set_index('LIBRARY', inplace=True)

    if args.output:
        metrics.to_csv(args.output, sep='\t')
    else:
        print(metrics)

Example #8

0

Show file

File: makersemcsv.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    output_sep = get_seperator(args.output_format)
    output_extension = {"TAB": ".tsv", ",": ".csv"}[args.output_format]

    if args.transcriptome:
        # isoforms
        load_quantifications = madqc.load_transcriptome_quantifications
        quantification_extension = "_isoform_" + args.quantification + output_extension
    else:
        # genes
        load_quantifications = madqc.load_genomic_quantifications
        quantification_extension = "_gene_" + args.quantification + output_extension

    for name in experiments:
        filename = name + quantification_extension
        replicates = experiments[name]
        logger.info("%s %s: %s", name, args.quantification, ",".join(replicates))
        quantifications = load_quantifications(replicates, libraries, args.quantification)
        quantifications.to_csv(filename, sep=output_sep)

Example #9

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_read_line_from_stream(self):
     mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
     with open(mm10tsv) as instream:
         lines = list(models.read_line_from_stream(instream))
     mm10 = models.load_library_tables([mm10tsv])
     # add one to mm10 dataframe because the header is not counted in len()
     self.assertEqual(len(lines), len(mm10) + 1)

Example #10

0

Show file

File: madqc.py Project: saupchurch/long-rna-seq-condor

def create_quantification_cache(
        library_table, experiment_name, replicates, quantification_name,
        sep='\t'):
    score_filename = models.make_correlation_filename(experiment_name)
    quant_filename = models.make_quantification_filename(experiment_name,
                                                         quantification_name)
    
    libraries = models.load_library_tables([library_table], sep=sep)
    quantifications = load_genomic_quantifications(
        replicates,
        libraries,
        quantification_name)
    if os.path.exists(quant_filename):
        os.unlink(quant_filename)

    store = pandas.HDFStore(quant_filename, complevel=9, complib='blosc')
    store.append('quantifications', quantifications)
    store.close()

    scores = compute_all_vs_all_scores(quantifications)
    if os.path.exists(score_filename):
        os.unlink(score_filename)

    store = pandas.HDFStore(score_filename)
    for key in scores:
        store.append(key, scores[key])
    store.close()

Example #11

0

Show file

File: madqc.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    elif args.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.WARN)

    sep = get_seperator(args.sep)
    if args.experiments:
        experiments = models.load_experiments(args.experiments,
                                              sep=sep,
                                              analysis_root=args.root)
    else:
        if args.experiment_name is None:
            parser.error(
                "Please provide an experiment name. (Used as filename)")
        if len(args.replicates) == 0:
            parser.error(
                "Please provide list of replicates or experiment table")
        experiments = {args.experiment_name: args.replicates}

    if args.libraries is None:
        parser.error("Please provide library information tables")

    libraries = models.load_library_tables(args.libraries, sep=sep)

    for i, experiment in experiments.iterrows():
        logging.info('Processing: %s', experiment.name)
        create_quantification_cache(experiment, libraries, args.quantification,
                                    args.model, sep)

Example #12

0

Show file

File: plot_coverage.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    if args.verbose:
        logging.basicConfig(level=logging.INFO)
    else:
        logging.basicConfig(level=logging.ERROR)

    experiments = models.load_experiments(args.experiments)
    libraries = models.load_library_tables(args.libraries)
    coverage = models.load_all_coverage(libraries)

    if args.all_experiments:
        make_combined_median_normalized_summary(experiments, coverage,
                                                args.output_format, args.bare)
    elif args.experiment_median_summary:
        make_per_experiment_median_normalized_summary(experiments, coverage,
                                                      args.output_format,
                                                      args.bare)
    elif args.by_experiment:
        make_by_experiment_median_summary(experiments, coverage,
                                          args.output_format, args.bare)
    elif args.combined_median_summary:
        make_combined_experiment_median_summary(experiments, coverage,
                                                args.output_format, args.bare)
    else:
        make_experiment_by_library_coverage_plots(experiments, coverage,
                                                  args.output_format,
                                                  args.bare)

Example #13

0

Show file

File: make_dag.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    if not validate_path_args(args):
        parser.error('Please set required parameters')

    if not (validate_library_file_existance(args) and
            validate_experiment_file_existance(args)):
        parser.error('Fix path to files')

    sep = get_seperator(args.sep)
    library_filenames = args.libraries
    library_filenames.extend(args.other_libraries)

    libraries = models.load_library_tables(library_filenames, sep)
    read1 = dict(find_fastqs(libraries, 'read_1'))
    if 'read_2' in libraries.columns:
        read2 = dict(find_fastqs(libraries, 'read_2'))
    else:
        read2 = {}

    dags = generate_star_rsem_analysis(args, libraries, read1, read2)
    generate_combined_analysis(args, dags)

    return 0

Example #14

0

Show file

    def test_reference_prefix(self):
        spurtsv = resource_filename(__name__, 'library-spur-se.tsv')
        spur = models.load_library_tables([spurtsv])

        self.assertEqual(make_dag.get_reference_prefix(spur, '12304'),
                         'scaffold')
        self.assertEqual(make_dag.get_reference_prefix(spur, '12307'), 'chr')

Example #15

0

Show file

File: make_c1_aggregate_bams.py Project: detrout/C1_mouse_limb_combined

def main(cmdline=None):
    parser = ArgumentParser()
    parser.add_argument('-n',
                        '--experiment-name',
                        required=True,
                        help='Experiment name to select')
    add_metadata_arguments(parser)
    add_debug_arguments(parser)
    args = parser.parse_args(cmdline)

    configure_logging(args)

    header_printed = False
    libraries = load_library_tables(args.libraries)
    experiments = load_experiments(args.experiments)

    replicates = experiments.loc[args.experiment_name, 'replicates']

    for i, (library_id,
            library) in enumerate(libraries.loc[replicates].iterrows()):
        filename = find_library_bam_file(library)
        LOGGER.info('  Reading %s %d/%d', filename, i + 1, len(replicates))

        mode = get_mode(filename, 'r')
        with pysam.AlignmentFile(filename, mode) as alignment:
            if not header_printed:
                print(str(alignment.header))
                header_printed = True

            for read in alignment:
                print(read.to_string())

Example #16

0

Show file

File: generate_trackhub.py Project: detrout/C1_mouse_limb_combined

def load_asof_run17_libraries():
    library_files = [os.path.expanduser(x.strip()) for x in ASOF_RUN17_library_files.split('\n')]
    libraries = models.load_library_tables(library_files)
    name = libraries.index.name
    libraries.index = [x.replace('_mm10', '').replace('_clean', '') for x in libraries.index]
    libraries.index.name = name

    return libraries

Example #17

0

Show file

def load_asof_run17_libraries():
    library_files = list(split_files_text(ASOF_RUN17_library_files))
    libraries = models.load_library_tables(library_files)
    name = libraries.index.name
    libraries.index = [sanitize_library_name(x) for x in libraries.index]
    libraries.index.name = name

    return libraries

Example #18

0

Show file

    def setUp(self):
        self.mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
        self.mm10 = models.load_library_tables([self.mm10tsv])

        self.female = pandas.DataFrame()
        self.female.index.name = 'female'

        self.male = pandas.DataFrame()
        self.male.index.name = 'male'

Example #19

0

Show file

File: c1extra.py Project: detrout/C1_mouse_limb_combined

def load_920cell_library_table():
    clusters = pandas.DataFrame(
        find_bigwigs.read_peng_20180710_cluster_memberships())
    asof_run17 = generate_combined_transcript_C1.ASOF_RUN17_library_files.split(
        '\n')
    libraries = [os.path.expanduser(x.strip()) for x in asof_run17]
    library_df = load_library_tables(libraries)
    library_df = library_df.reindex(clusters['cell_id'])
    return library_df

Example #20

0

Show file

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    experiments = load_experiments(args.experiments)
    libraries = load_library_tables(args.libraries)

    plot = MeanGeneCoverage(experiments, libraries)
    plot.use_experiment(args.use_experiment)
    return plot

Example #21

0

Show file

def build_hash_tree(library_filename):
    table = load_library_tables([library_filename])

    hashes = {}
    for library_id, row in table.iterrows():
        analysis_dir = row.analysis_dir
        name = row.analysis_name + '-' + genome_name_from_library(
            row) + '_genome.bam'
        alignment = os.path.join(analysis_dir, name)
        hashes[library_id] = hash_alignments(alignment)

    return hashes

Example #22

0

Show file

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    if args.debug:
        logging.basicConfig(level=logging.DEBUG)
    else:
        logging.basicConfig(level=logging.INFO)

    libraries = []
    if args.libraries:
        libraries = load_library_tables(args.libraries,
                                        analysis_root=args.root)
        LOGGER.info("loaded %d libraries", len(libraries))

    if len(libraries) == 0 and len(args.gene_list) == 0:
        parser.error('Please specify a libraries to process')

    with open(args.gtf, 'rt') as stream:
        gene_types = readGeneTypes(stream)
        LOGGER.info("Loaded %s gene types", len(gene_types))

    coverage_by_type = {}
    counts_by_type = {}
    for gene_coverage_table in load_all_gene_coverage(libraries,
                                                      args.gene_list,
                                                      args.gene_normalization):
        coverage, counts = sum_gene_coverage_by_type(gene_types,
                                                     gene_coverage_table)
        coverage_by_type[coverage.name] = coverage
        counts_by_type[coverage.name] = counts

    LOGGER.info('Preparing plot class')
    plot = GeneCoverageDetail(coverage_by_type, counts_by_type,
                              args.gene_normalization)

    if args.save:
        for library_id in plot:
            # avoid names that cause problems for files systems
            assert not library_id.startswith('..')
            assert '/' not in library_id
            assert '\\' not in library_id
            filename = '{}_gene_coverage_detail.html'.format(library_id)
            pathname = os.path.join(args.output_dir, filename)
            LOGGER.info("Saving plot for %s to %s", library_id, pathname)
            save(
                plot.make_plot(library_id),
                pathname,
                resources=resources.CDN,
                title=library_id,
            )
    return plot

Example #23

0

Show file

def main(cmdline=None):
    parser = ArgumentParser()
    parser.add_argument('-o', '--output', help='output directory')
    parser.add_argument('--mode',
                        default=None,
                        choices=[
                            'customtrack',
                            'trackhub',
                            'merge_paper_wiggles',
                            'paper_median_coverage',
                            'check_bedgraphs',
                            'localize_tsvs',
                            'paper_as_single_experiment_tsv',
                            'paper_as_cluster_experiment_tsv',
                        ])
    args = parser.parse_args(cmdline)

    experiment_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_experiment_files.split()
    ]
    library_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_library_files.split()
    ]

    experiments = models.load_experiments(experiment_files)
    libraries = models.load_library_tables(library_files)

    to_include = read_peng_20180710_cluster_memberships()
    #print('{} cells to include'.format(len(to_include)))

    if args.mode == 'customtrack':
        make_custom_tracks()
    elif args.mode == 'trackhub':
        make_trackhub()
    elif args.mode == 'merge_paper_wiggles':
        merge_paper_wiggles(to_include, libraries)
    elif args.mode == 'paper_median_coverage':
        make_paper_median_coverage(to_include, libraries, args.output)
    elif args.mode == 'check_bedgraphs':
        check_bedgraphs(to_include, libraries)
    elif args.mode == 'localize_tsvs':
        localize_tsvs(experiments, libraries, args.output)
    elif args.mode == 'paper_as_single_experiment_tsv':
        paper920_as_single_experiment_tsv(to_include, args.output)
    elif args.mode == 'paper_as_cluster_experiment_tsv':
        paper920_as_cluster_experiment_tsv(to_include, args.output)
    else:
        parser.error('Did you want to pick an operation mode?')

Example #24

0

Show file

File: distribution.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    experiments = load_experiments(args.experiments)
    libraries = load_library_tables(args.libraries)
    if args.use_experiment:
        try:
            experiments = experiments.loc[[args.use_experiment]]
        except KeyError:
            print('{} was not found in {}'.format(args.use_experiment, ', '.join(list(experiments.index))))
            return None
    plot = DistributionPlot(experiments, libraries)
    return plot

Example #25

0

Show file

File: makersemcsv.py Project: saupchurch/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    output_sep = get_seperator(args.output_format)
    output_extension = {
        'TAB': '.tsv',
        ',': '.csv',
    }[args.output_format]

    if args.add_names:
        if args.gtf_cache is None:
            parser.error('GTF-cache is needed to add names to the quantification file')
        else:
            logger.info('Loading GTF Cache %s', args.gtf_cache)
            annotation = models.load_gtf_cache(args.gtf_cache)
    else:
        annotation = None

    if args.transcriptome:
        # isoforms
        load_quantifications = madqc.load_transcriptome_quantifications
        lookup_ids = models.lookup_gene_name_by_transcript_id
        quantification_extension = '_isoform_' + args.quantification + output_extension
    else:
        # genes
        load_quantifications = madqc.load_genomic_quantifications
        lookup_ids = models.lookup_gene_name_by_gene_id
        quantification_extension = '_gene_' + args.quantification + output_extension

    for name in experiments:
        filename = name + quantification_extension
        replicates = experiments[name]
        logger.info("%s %s: %s",
                    name, args.quantification, ','.join(replicates))
        quantifications = load_quantifications(
            replicates, libraries, args.quantification)

        if annotation is not None:
            quantifications = lookup_ids(annotation, quantifications)

        quantifications.to_csv(filename, sep=output_sep)

Example #26

0

Show file

File: generate_combined_transcript_C1.py Project: detrout/C1_mouse_limb_combined

def load_filtered_transcripts():
    sep = '\t'

    cache_file = os.path.expanduser(
        '~sau/genomes/mm10-M4-male/mm10-M4-male.h5')
    #annotation = models.load_gtf_cache(cache_file)
    annotation = None

    loader = IsoformRsemLoader('FPKM', annotation)
    index_name = 'transcript_id'
    # loader = GeneRsemLoader(args.quantification, annotation)
    #index_name = 'gene_id'

    to_include = generate_to_include_asof_run17()[1:]

    experiment_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_experiment_files.split()
    ]
    library_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_library_files.split()
    ]

    quantifications = []
    for e, l in zip(experiment_files, library_files):
        print('loading', e)
        experiments = models.load_experiments([e], sep=sep)
        libraries = models.load_library_tables([l], sep=sep)
        for i, experiment in experiments.iterrows():
            print(experiment)
            quantification = loader.load(experiment, libraries)
            quantification.columns = list(
                filter_columns(quantification.columns))
            quantifications.append(quantification)

    sheets = pandas.concat(quantifications, axis=1)

    print('all', sheets.shape)
    # sheets.to_csv('C1_mouse_combined_transcript_asof_run17_unfiltred.tsv', sep='\t')
    # was crashing because of _mm10 suffix
    filtered = sheets[to_include]
    print('filtered', filtered.shape)
    return filtered

Example #27

0

Show file

File: link_files.py Project: detrout/C1_mouse_limb_combined

def main(cmdline=None):
    parser = ArgumentParser()
    parser.add_argument('-o', '--output-dir')
    args = parser.parse_args(cmdline)

    experiment_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_experiment_files.split()
    ]
    library_files = [
        os.path.expanduser(x.strip())
        for x in ASOF_RUN17_library_files.split()
    ]

    experiments = load_experiments(experiment_files)
    libraries = load_library_tables(library_files)

    #link_rsem(libraries, args.output_dir)
    link_genome_bams(libraries, args.output_dir)

Example #28

0

Show file

File: make_dag.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    if not validate_args(args):
        parser.error("Please set required parameters")

    sep = get_seperator(args.sep)
    libraries = models.load_library_tables(args.libraries, sep)
    read1 = dict(find_fastqs(libraries, "read_1"))
    if "read_2" in libraries.columns:
        read2 = dict(find_fastqs(libraries, "read_2"))
    else:
        read2 = {}

    dag = generate_star_rsem_analysis(args, libraries, read1, read2)
    print(dag)

    return 0

Example #29

0

Show file

File: genes_detected.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    experiments = load_experiments(args.experiments)
    libraries = load_library_tables(args.libraries)
    if args.use_experiment:
        try:
            experiments = experiments.loc[[args.use_experiment]]
        except KeyError:
            logger.error('{} was not found in {}'.format(
                args.use_experiment, ', '.join(list(experiments.index))))
            return None

    if len(args.gene_type_filter) > 0:
        logger.info('Limiting to the following gene types {}'.format(','.join(
            args.gene_type_filter)))
    else:
        logger.info('Using all gene types')

    # ids will be None if args.gene_list_filter is None
    ids = load_gene_id_list(args.gene_list_filter)

    plot = GenesDetectedPlot(
        experiments,
        libraries,
        args.genome_dir,
        args.quantification,
        gene_type_filter=args.gene_type_filter,
        gene_list_filter=ids,
    )

    if __name__ == '__main__':
        curdoc().add_root(plot.static_layout())
        save(curdoc(), args.output, title=plot.title)

    return plot

Example #30

0

Show file

File: makersemcsv.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    gtf_cache = None
    if args.add_names:
        if args.genome_dir is None:
            parser.error(
                'genome-dir is needed to add names to the quantification file')
        else:
            gtf_cache = GTFCache(libraries, args.genome_dir)

    if len(args.quantification) > 0:
        quantification_list = args.quantification
    else:
        quantification_list = ['FPKM']

    if args.transcriptome:
        # isoforms
        RsemLoader = IsoformRsemLoader
    else:
        # genes
        RsemLoader = GeneRsemLoader

    for quantification in quantification_list:
        logger.info('Building expression matrix for %s', quantification)
        for i, experiment in experiments.iterrows():
            loader = RsemLoader(quantification, gtf_cache)
            matrix = loader.load(experiment, libraries)
            loader.save(matrix, args.output_format)

Example #31

0

Show file

File: makestarcsv.py Project: detrout/long-rna-seq-condor

def main(cmdline=None):
    parser = make_parser()
    args = parser.parse_args(cmdline)

    configure_logging(args)

    sep = get_seperator(args.sep)
    experiments = models.load_experiments(args.experiments, sep=sep)
    libraries = models.load_library_tables(args.libraries, sep=sep)

    if args.add_names:
        if args.gtf_cache is None:
            parser.error('GTF-cache is needed to add names to the quantification file')
        else:
            logger.info('Loading GTF Cache %s', args.gtf_cache)
            annotation = models.load_gtf_cache(args.gtf_cache)
    else:
        annotation = None

    loader = StarLoader(args.strand, annotation)

    for i, experiment in experiments.iterrows():
        quantification = loader.load(experiment, libraries)
        loader.save(quantification, args.output_format)

Example #32

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_stranded_library(self):
     mm10tsv = resource_filename(__name__, 'library-mm10-stranded.tsv')
     mm10 = models.load_library_tables([mm10tsv])
     expected = ['forward', 'reverse', 'unstranded', 'forward', 'reverse', 'unstranded']
     for strand, (library_id, row) in zip(expected, mm10.iterrows()):
         self.assertEqual(strand, row.stranded)

Example #33

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

    def test_genome_name_from_library_series(self):
        mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
        mm10 = models.load_library_tables([mm10tsv])

        self.assertEqual(models.genome_name_from_library(mm10.loc['12304']), 'mm10-M4-female')
        self.assertEqual(models.genome_name_from_library(mm10.loc['12309']), 'mm10-M4-male')

Example #34

0

Show file

File: test_models.py Project: detrout/long-rna-seq-condor

 def test_load_all_distribution(self):
     mm10tsv = resource_filename(__name__, 'library-mm10-se.tsv')
     mm10 = models.load_library_tables([mm10tsv])
     distribution = models.load_all_distribution(mm10)
     self.assertEqual(distribution.shape, (1, 3))
     self.assertEqual(distribution.index[0], '12304')

Example #35

0

Show file

File: test_make_dag.py Project: detrout/long-rna-seq-condor

    def test_reference_prefix(self):
        spurtsv = resource_filename(__name__, "library-spur-se.tsv")
        spur = models.load_library_tables([spurtsv])

        self.assertEqual(make_dag.get_reference_prefix(spur, "12304"), "scaffold")
        self.assertEqual(make_dag.get_reference_prefix(spur, "12307"), "chr")

Example #36

0

Show file

File: test_makersemcsv.py Project: detrout/long-rna-seq-condor

 def setUp(self):
     self.exp_tsv = resource_filename(__name__, 'experiments-mm10.tsv')
     self.lib_tsv = resource_filename(__name__, 'library-mm10-se.tsv')
     self.libraries = models.load_library_tables([self.lib_tsv])
     self.experiments = models.load_experiments([self.exp_tsv])

Example #37

0

Show file

File: test_make_dag.py Project: detrout/long-rna-seq-condor

    def test_reference_prefix_missing(self):
        mm10tsv = resource_filename(__name__, "library-mm10-se.tsv")
        mm10 = models.load_library_tables([mm10tsv])

        self.assertEqual(make_dag.get_reference_prefix(mm10, "12304"), "chr")