Exemple #1
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for r_ix, r in self.records.iterrows():
            read_type = r['paired-end or single-end'].lower()
            sample_info = {'treatment': r['name']}
            wf_key = '-'.join([read_type])
            if 'control' in r.keys() and r['control'] and type(r['control']) != float:  # After reading this metadata info, this will contain a nan (float) if undetermined
                sample_info['control'] = r['control']
                wf_key += '-with-control'
            wf_conf_dict[wf_key] = {'rt': read_type,
                                    'st': sample_info.keys()}
            genome = consts.GENOME  # Default genome
            if 'genome' in r.keys():
                genome = r['genome']

            samples_dict[wf_key].append([sample_info, genome])
        for wf_key, samples_genomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_genomes)):
                    sample, genome = s[0], s[1]
                    ref_dataset = consts.ReferenceDataset(genome)
                    self.update_paths(ref_dataset)
                    yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir, self.experiment_type), wf_key, si

            else:
                samples_list, genomes_list = zip(*samples_genomes)
                if len(set(genomes_list)) > 1:
                    raise Exception('More than one genome specified (%s). Please create a different metadata file'
                                    ' per genome or provide a sjdb and specify the --separate-jsons argument' %
                                    ', '.join(set(genomes_list)))
                ref_dataset = consts.ReferenceDataset(genomes_list[0])
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir, self.experiment_type), wf_key, None
Exemple #2
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for r in self.records:
            read_type = r['Paired-end or single-end'].lower()
            sample_name = r['Name']
            strand_specific = r['Strand specificity']
            genome = consts.GENOME  # Default genome
            if 'Genome' in r.keys():
                genome = r['Genome']
            kws = [read_type,  strand_specific]
            if self.skip_star_2pass:
                kws.append('with-sjdb')
            wf_key = '-'.join(kws)
            wf_conf_dict[wf_key] = {'rt': read_type, 'sn': sample_name}
            samples_dict[wf_key].append([sample_name, genome])
        for wf_key, samples_gemomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_gemomes)):
                    sample, genome = s[0], s[1]
                    ref_dataset = consts.ReferenceDataset(genome, read_length=self.read_length)
                    self.update_paths(ref_dataset)

                    yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir), wf_key, si
            else:
                samples_list = [s[0] for s in samples_genomes]
                genomes_list = [g[1] for g in samples_genomes]
                if len(set(genomes_list)) > 1:
                    raise Exception(
                        'More than one genome specified (%s). Please create a different metadata file'
                        ' per genome or provide a sjdb and specify the --separate-jsons argument' %
                        ', '.join(set(genomes_list)))
                ref_dataset = consts.ReferenceDataset(genomes_list[0], read_length=self.read_length)
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir), wf_key, None
Exemple #3
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for rix, r in self.records.iterrows():
            read_type = r['paired-end or single-end'].lower()
            sample_name = r['name']
            strand_specific = r['strand specificity']
            genome = consts.GENOME  # Default genome
            if 'genome' in r.keys():
                genome = r['genome']
            ercc_spikein = False
            if 'with ercc spike-in' in r.keys():
                ercc_spikein = r['with ercc spike-in']
            kws = [read_type,  strand_specific]
            if self.skip_star_2pass:
                kws.append('with-sjdb')
            wf_key = '-'.join(kws)
            wf_conf_dict[wf_key] = {'rt': read_type, 'sn': sample_name}
            read_length = self.read_length
            if 'read length' in r.keys():
                read_length = int(r['read length'])
            samples_dict[wf_key].append([sample_name, genome, ercc_spikein, read_length])
        for wf_key, samples_genomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_genomes)):
                    sample, genome, ercc_spikein, read_length = s
                    ref_dataset = consts.ReferenceDataset(genome,
                                                          read_length=read_length,
                                                          with_ercc=ercc_spikein)
                    self.update_paths(ref_dataset)

                    yield self.render_json(wf_conf_dict[wf_key], [sample], data_dir), wf_key, si
            else:
                samples_list = [s[0] for s in samples_genomes]
                genomes_list = [g[1] for g in samples_genomes]
                ercc_list = [e[2] for e in samples_genomes]
                read_length_list = [l[3] for l in samples_genomes]
                if len(set(genomes_list)) > 1:
                    raise Exception(
                        'More than one genome specified (%s). Please create a different metadata file'
                        ' per genome or provide a sjdb and specify the --separate-jsons argument' %
                        ', '.join(set(genomes_list)))
                if len(set(ercc_list)) > 1:
                    raise Exception(
                        'With and without ERCC spike-in specified. Please create a different metadata file'
                        ' per ERCC choice or provide a sjdb and specify the --separate-jsons argument')
                if len(set(read_length_list)) > 1:
                    raise Exception(
                        'More than one read length specified. Please create a different metadata file'
                        ' per read length choice or provide a sjdb and specify the --separate-jsons argument')
                ref_dataset = consts.ReferenceDataset(genomes_list[0],
                                                      read_length=read_length_list[0],
                                                      with_ercc=ercc_list[0])
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key], sorted(samples_list), data_dir), wf_key, None
Exemple #4
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for rix, r in self.records.iterrows():
            read_type = r['paired-end or single-end'].lower()
            sample_name = r['name']
            genome = consts.GENOME  # Default genome
            if 'genome' in r.keys():
                genome = r['genome']
            kws = [read_type]
            wf_key = '-'.join(kws)
            with_umis = 'umis' in r.keys() and not is_false(r['umis'])
            if with_umis:
                wf_key += '-umis'

            wf_conf_dict[wf_key] = {
                'rt': read_type,
                'sn': sample_name,
                'umis': with_umis
            }
            samples_dict[wf_key].append([sample_name, genome])
        for wf_key, samples_genomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_genomes)):
                    sample, genome = s[0], s[1]
                    ref_dataset = consts.ReferenceDataset(
                        genome,
                        read_length=self.read_length,
                        umis='umis' in wf_key)
                    self.update_paths(ref_dataset)

                    yield self.render_json(wf_conf_dict[wf_key], [sample],
                                           data_dir), wf_key, si
            else:
                samples_list = [s[0] for s in samples_genomes]
                genomes_list = [g[1] for g in samples_genomes]
                if len(set(genomes_list)) > 1:
                    raise Exception(
                        'More than one genome specified (%s). Please create a different metadata file'
                        ' per genome or provide a sjdb and specify the --separate-jsons argument'
                        % ', '.join(set(genomes_list)))
                ref_dataset = consts.ReferenceDataset(
                    genomes_list[0],
                    read_length=self.read_length,
                    umis='umis' in wf_key)
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key],
                                       sorted(samples_list),
                                       data_dir), wf_key, None
Exemple #5
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for r in self.records:
            read_type = r['Paired-end or single-end'].lower()
            peak_type = r['Peak type'].lower()
            sample_info = {'treatment': r['Name'], 'iter': r['Iter num']}
            wf_key = '-'.join([read_type, peak_type])
            if 'Control' in r.keys(
            ) and r['Control'] and r['Control'].upper() != 'NA':
                sample_info['control'] = r['Control']
                wf_key += '-with-control'
            wf_conf_dict[wf_key] = {
                'rt': read_type,
                'pt': peak_type,
                'st': sample_info.keys()
            }
            genome = 'hg38'  # Default genome
            if 'Genome' in r.keys():
                genome = r['Genome']

            samples_dict[wf_key].append([sample_info, genome])
        for wf_key, samples_genomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_genomes)):
                    sample, genome = s[0], s[1]
                    ref_dataset = consts.ReferenceDataset(genome)
                    self.update_paths(ref_dataset)
                    yield self.render_json(wf_conf_dict[wf_key], [sample],
                                           data_dir,
                                           self.experiment_type), wf_key, si

            else:
                samples_list, genomes_list = zip(*samples_genomes)
                if len(set(genomes_list)) > 1:
                    raise Exception(
                        'More than one genome specified (%s). Please create a different metadata file'
                        ' per genome or provide a sjdb and specify the --separate-jsons argument'
                        % ', '.join(set(genomes_list)))
                ref_dataset = consts.ReferenceDataset(genomes_list[0])
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key],
                                       sorted(samples_list), data_dir,
                                       self.experiment_type), wf_key, None
Exemple #6
0
    def parse_metadata(self, data_dir):
        samples_dict = defaultdict(list)
        wf_conf_dict = {}
        for r in self.records:
            read_type = r['Paired-end or single-end'].lower()
            sample_info = {'treatment': r['Name'], 'iter': r['Iter num']}
            wf_key = '-'.join([read_type])
            genome = 'hg38'  # Default genome
            if 'Genome' in r.keys():
                genome = r['Genome']
            if not ('Blacklist removal' in r.keys()
                    and is_false(r['Blacklist removal'])):
                wf_key += '-blacklist-removal'

            wf_conf_dict[wf_key] = {'rt': read_type}
            samples_dict[wf_key].append([sample_info, genome])
        for wf_key, samples_genomes in samples_dict.iteritems():
            if self.obj.separate_jsons:
                for si, s in enumerate(sorted(samples_genomes)):
                    sample, genome = s[0], s[1]
                    ref_dataset = consts.ReferenceDataset(genome)
                    if 'blacklist-removal' not in wf_key:
                        ref_dataset.encode_blacklist_bedfile = None
                    self.update_paths(ref_dataset)
                    yield self.render_json(wf_conf_dict[wf_key], [sample],
                                           data_dir,
                                           self.experiment_type), wf_key, si
            else:
                samples_list = [s[0] for s in samples_genomes]
                genomes_list = [g[1] for g in samples_genomes]
                if len(set(genomes_list)) > 1:
                    raise Exception(
                        'More than one genome specified (%s). Please create a different metadata file'
                        ' per genome or provide a sjdb and specify the --separate-jsons argument'
                        % ', '.join(set(genomes_list)))
                ref_dataset = consts.ReferenceDataset(genomes_list[0])
                if 'blacklist-removal' not in wf_key:
                    ref_dataset.encode_blacklist_bedfile = None
                self.update_paths(ref_dataset)
                yield self.render_json(wf_conf_dict[wf_key],
                                       sorted(samples_list), data_dir,
                                       self.experiment_type), wf_key, None