Python RunRecord.addCommandsの例、chippy.util.run_record.RunRecord.addCommands Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_run_record.py プロジェクト: cameron-jack/Chippy

    def test_add_commands(self):
        """ test that RunRecord.addCommands correctly logs long lines of text
        """
        logging.disable(logging.NOTSET)
        rr = RunRecord('test_add_commands')
        rr.addCommands([])
        cmd_line = 'This is a list of command arguments that probably '+\
                   'do not exist in the real world'
        cmds = cmd_line.split(' ')
        rr.addCommands(cmds)

        recorded_lines = [
            'ChipPy.test_add_commands\tINFO\tcommand-line\tNo arguments given',
            'ChipPy.test_add_commands\tINFO\tcommand-line\tThis is a list of command arguments',
            'ChipPy.test_add_commands\tINFO\tcommand-line\tthat probably do not exist in the real',
            'ChipPy.test_add_commands\tINFO\tcommand-line\tworld'
        ]

        log_file = open(LOG_FN, 'r')
        for n, line in enumerate(log_file):
            line_parts = [lp.strip() for lp in line.split('\t')]
            #print repr(recorded_lines[n])
            #print repr('\t'.join(line_parts[1:]))
            assert '\t'.join(line_parts[1:]) == recorded_lines[n]


        logging.disable(logging.CRITICAL)

コード例 #2

0

ファイルを表示

def main():
    """
        How do counts distributions vary with rank?
    """
    rr = RunRecord('counts_distribution')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(window_title='Counts Distribution')

    studies = [CountsStudy(fn) for fn in args.collections]

    fig_details = FigureDetails(x_size=args.fig_width,
                                y_size=args.fig_height,
                                title=args.title,
                                x_text=args.xlabel,
                                y_text=args.ylabel)

    if args.normalise_by_RPM:
        for study in studies:
            study.normaliseByRPM()

    score_groups = []
    for study in studies:
        score_groups.append(
            study.scoresAsRankedArray(metric=args.counts_region,
                                      log2=args.y_axis_is_log))

    make_plot(score_groups, fig_details, args.plot_type, args.plot_filename)

    rr.display()

コード例 #3

0

ファイルを表示

def main():
    rr = RunRecord('add_expression_db')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse(window_title='Add Expression to DB')
    session = db_query.make_session(args.db_path)

    name = args.name
    description = args.description
    ref_file = args.expression_data
    sample_type = args.sample_type

    # Check that Sample and Reference File are both unique
    if name in db_query.get_sample_entries(session):
        rr.dieOnCritical('Sample name already exists', name)
    if ref_file in db_query.get_reffile_entries(session,
                                                reffile_name=ref_file):
        rr.dieOnCritical('ReferenceFile already loaded', ref_file)

    if sample_types[sample_type] == sample_types['abs_expr']:
        expr_table = gene_expr_to_table(
            args.expression_data,
            stable_id_label=args.gene_id_heading,
            probeset_label=args.probeset_heading,
            exp_label=args.expression_heading,
            allow_probeset_many_gene=args.allow_probeset_many_gene,
            validate=True,
            sep=args.sep)

    elif sample_types[sample_type] == sample_types['diff_expr']:
        # validation breaks with some of Rohan's diff files
        # he's included all probesets but only the mean score, once.
        expr_table = gene_expr_diff_to_table(
            args.expression_data,
            stable_id_label=args.gene_id_heading,
            probeset_label=args.probeset_heading,
            exp_label=args.expression_heading,
            sig_label=args.significance_heading,
            pval_label=args.p_value_heading,
            allow_probeset_many_gene=args.allow_probeset_many_gene,
            validate=False,
            sep=args.sep)
    elif sample_types[sample_type] == sample_types['target_genes']:
        expr_table = LoadTable(args.expression_data, sep=args.sep)

    else:
        rr.dieOnCritical('Unknown sample type', args.sample_type)

    success = add_data(session,
                       name,
                       description,
                       args.expression_data,
                       expr_table,
                       sample_type=args.sample_type,
                       reffile1=args.reffile1,
                       reffile2=args.reffile2)

    rr.addInfo(name + ' added to DB', success)
    rr.display()

コード例 #4

0

ファイルを表示

ファイル: diff_abs_plots.py プロジェクト: cameron-jack/Chippy

def main():
    """
        Plot the score differential (y-axis) against the absolute expression
        components (x-axes of each plot).
    """
    rr = RunRecord('diff_abs_plots')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(\
            window_title='Difference vs Absolute Expression Plots')

    groups_dict = dict([('extremes_colour', args.extremes_colour),
                        ('signif_colour', args.signif_colour),
                        ('bulk_colour', args.bulk_colour),
                        ('hide_extremes', args.hide_extremes),
                        ('hide_signif', args.hide_signif),
                        ('hide_bulk', args.hide_bulk)])

    # Should do number restrictions in load step
    # Load all genes into RawPlotData object
    print 'Loading data for first plot'
    raw_plot_data1 = load_sample_genes(args.db_path, args.diff_sample,
                                       args.abs_expr_sample1,
                                       args.sample_extremes)

    print 'Loading data for second plot'
    raw_plot_data2 = load_sample_genes(args.db_path, args.diff_sample,
                                       args.abs_expr_sample2,
                                       args.sample_extremes)

    print 'Building plot points'
    # get back a list of plot_dot objects with 'x', 'y', 'colour', 'area'
    plot_dots1 = build_plot_points(raw_plot_data1, args.ranks, args.num_genes)

    plot_dots2 = build_plot_points(raw_plot_data2, args.ranks, args.num_genes)

    plot_dict = dict([('out_name', args.plot1_name), ('title', args.title),
                      ('y_text', args.ylabel), ('y_units', args.yaxis_units),
                      ('x_text', args.xaxis_text1),
                      ('x_units', args.xaxis_units),
                      ('diff_name', raw_plot_data1.diff_name),
                      ('sample_name', raw_plot_data1.sample_name)])
    print 'Generating plot 1'
    make_plot(plot_dots1, plot_dict, groups_dict)

    plot_dict['sample_name'] = raw_plot_data2.sample_name
    plot_dict['out_name'] = args.plot2_name
    plot_dict['x_text'] = args.xaxis_text2

    print 'Generating plot 2'
    make_plot(plot_dots2, plot_dict, groups_dict)

    rr.display()

コード例 #5

0

ファイルを表示

def main():
    rr = RunRecord('drop_expression_db')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse(window_title='Drop Expression Data')
    session = db_query.make_session(args.db_path)

    if db_query.drop_sample_records(session, args.sample):
        rr.addInfo('Removing ' + args.sample, 'Success')
    else:
        rr.addWarning('Removing ' + args.sample, 'Failure')

    rr.display()

コード例 #6

0

ファイルを表示

def main():
    rr = RunRecord('db_summary')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(window_title='DB Summary')
    session = make_session(args.db_path)
    sample_name = args.sample if args.sample else None

    chroms = get_chroms(session)
    species = get_species(session)

    if sample_name is None:
        total_samples_count = get_sample_counts(session)
        sample_names = get_all_sample_names(session)
        total_genes_count = get_gene_counts(session)
        total_exon_count = get_exon_counts(session)
        total_expr_count = get_expression_counts(session)
        total_diff_genes_count = get_diff_counts(session)
        total_target_genes_count = get_targetgene_counts(session)
        total_reffiles_count = get_reffile_counts(session)
    else:
        total_expr_count = get_expression_counts(session, sample_name)
        total_diff_genes_count = get_diff_counts(session, sample_name)
        total_target_genes_count = get_targetgene_counts(session, sample_name)
        reffiles_entries = get_reffile_entries(session,
                                               sample_name=sample_name)

    rr.addInfo('ChipPy DB name', args.db_path)
    rr.addInfo('Species name', species)
    rr.addInfo('Chroms list', chroms)
    if sample_name is None:
        rr.addInfo('Total # of sample entries', total_samples_count)
        rr.addInfo('Sample names', sample_names)
        rr.addInfo('Total # of gene entries', total_genes_count)
        rr.addInfo('Total # of exon entries', total_exon_count)
    rr.addInfo('Total # of absolute-scored gene entries', total_expr_count)
    rr.addInfo('Total # of differential gene entries', total_diff_genes_count)
    rr.addInfo('Total # of target gene entries', total_target_genes_count)
    if sample_name is None:
        rr.addInfo('Total # of reference files', total_reffiles_count)
    else:
        if len(reffiles_entries) > 0:
            rr.addInfo('Reference file name', reffiles_entries)
        else:
            rr.addError('Reference file name', 'Not Available')

    rr.display()

コード例 #7

0

ファイルを表示

def main():
    rr = RunRecord('start_chippy_db')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse()
    create_path(args.save_db_dir)

    if not os.path.isdir(args.save_db_dir):
        sys.stderr.write('The save_db_dir must be an existing directory.\n')
        return

    release = args.ensembl_release
    species = args.species
    chippy_db_name = args.save_db_prefix + '_chippy_' + str(release) +\
            '_' + species + '.db'
    db_path = os.path.join(args.save_db_dir, chippy_db_name)
    if not os.path.exists(db_path):
        session = make_session(db_path)

        hostname = args.hostname
        username = args.username
        password = args.password

        account = HostAccount(hostname, username, password, port=args.port)
        add_ensembl_gene_data(session,
                              args.species,
                              ensembl_release=args.ensembl_release,
                              account=account)

        success = create_dummy_expr(session)
        if success:
            rr.addInfo('Dummy data added successfully', 'Expr=1.')
        else:
            rr.addError('Dummy data failed to upload to DB',
                        'Expect bigger problems')

        rr.addInfo('Chippy DB written', db_path)
        print os.path.realpath(db_path)
    else:
        rr.addError('Chippy DB with this name already exists', db_path)

    if args.show_log:
        rr.display()

コード例 #8

0

ファイルを表示

ファイル: expr_distribution.py プロジェクト: cameron-jack/Chippy

def main():
    """
        How do expression distributions vary with rank?
    """
    rr = RunRecord('expr_distribution')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(window_title='Expression Distribution')
    db_path = args.db_path

    studies = [ExprStudy(samp, db_path) for samp in args.abs_expr_samples]

    fig_details = FigureDetails(x_size=args.fig_width,
                                y_size=args.fig_height,
                                title=args.title,
                                x_text=args.xlabel,
                                y_text=args.ylabel)

    score_groups = []
    for study in studies:
        score_groups.append(study.scoresAsRankedArray(log2=args.y_axis_is_log))

    make_plot(score_groups, fig_details, args.plot_type, args.plot_filename)

    rr.display()

コード例 #9

0

ファイルを表示

ファイル: counts_vs_expr.py プロジェクト: cameron-jack/Chippy

def main():
    """
        Comparative plots of count or rank data for chromatin or expression.
    """

    rr = RunRecord('counts_vs_expr')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(use_scrollbars=True,
                                     use_save_load_button=True,
                                     window_title='Counts vs Expression Plots')

    if len(args.collections) > 1:
        rr.dieOnCritical('Only 1 collection allowed. You chose',
                         len(args.collections))

    # Load all required data
    print 'Loading expression and counts data'
    sample = args.abs_expr_sample
    collection = args.collections[0]
    matched_studies = MatchedStudy(sample,
                                   collection,
                                   args.db_path,
                                   args.region_feature,
                                   include_target=args.include_targets,
                                   exclude_target=args.exclude_targets)

    print 'Creating plot points'
    plot_points = matched_studies.get_matched_genes_as_xy_plotpoints(
        args.x_axis_type, args.expr_is_ranks, args.counts_is_ranks)

    fig = FigureDetails(x_size=args.fig_width,
                        y_size=args.fig_height,
                        title=sample + ' vs ' + collection)

    if args.x_axis_type.lower() == 'expression':
        fig.x_text = 'Expression'
        fig.y_text = 'Counts'
        if args.counts_is_ranks:
            fig.y_text += ' Ranks'
        if args.expr_is_ranks:
            fig.x_text += ' Ranks'
    else:
        fig.y_text = 'Expression'
        fig.x_text = 'Counts'
        if args.counts_is_ranks:
            fig.x_text += ' Ranks'
        if args.expr_is_ranks:
            fig.y_text += ' Ranks'

    if args.x_axis_is_log:
        fig.x_text += ' (log base 2)'
    if args.y_axis_is_log:
        fig.y_text += ' (log base 2)'

    make_plot(plot_points,
              plot_fn=args.plot_filename,
              fig_details=fig,
              x_axis_is_log=args.x_axis_is_log,
              y_axis_is_log=args.y_axis_is_log,
              x_axis_type=args.x_axis_type,
              counts_is_ranks=args.counts_is_ranks,
              expr_is_ranks=args.expr_is_ranks)

    rr.display()

コード例 #10

0

ファイルを表示

ファイル: export_counts.py プロジェクト: cameron-jack/Chippy

def main():
    """
        Returns a pickle of size window_start to window_finish containing
        chromatin mapping averages per base, one per gene, ranked by
        expression.
    """
    rr = RunRecord('export_counts')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse(window_title='Export Counts')

    session = db_query.make_session(args.db_path)

    sample_name = args.expr_sample
    print 'Loading counts data for', sample_name

    include_name = None
    exclude_name = None
    if args.include_targets:
        include_name = args.include_targets
        rr.addInfo('include gene targets', include_name)

    if args.exclude_targets:
        exclude_name = args.exclude_targets
        rr.addInfo('exclude gene targets', exclude_name)

    if (args.multitest_signif_val is not None) and not \
            (-1 <= args.multitest_signif_val <= 1):
        rr.dieOnCritical('Multitest_signif_val should be -1, 0, 1',
                         args.multitest_signif_val)

    if args.chr_prefix != '':
        # If it writes nothing then cogent.Table fails because it's fragile
        rr.addInfo('BAM/BED chromosome prefix given', args.chr_prefix)

    window_upstream = args.window_upstream
    assert window_upstream > 0, \
            'upstream window must be of at least size 1 bp'
    window_downstream = args.window_downstream
    assert window_downstream > 0, \
            'downstream window must be of at least size 1 bp'

    get_collection(session,
                   sample_name,
                   args.feature_type,
                   args.BAMorBED,
                   args.chr_prefix,
                   window_upstream,
                   window_downstream,
                   args.multitest_signif_val,
                   args.collection,
                   args.overwrite,
                   args.tab_delimited,
                   include_name,
                   exclude_name,
                   bedgraph=args.make_bedgraph,
                   BED_windows=args.BED_windows,
                   chrom_size=args.max_chrom_size,
                   no_overlap=args.no_overlap)

    session.close()
    rr.display()

コード例 #11

0

ファイルを表示

ファイル: gui.py プロジェクト: cameron-jack/Chippy

    def makeCommands(self):
        """ return the arg.long_names and the user inputs """
        non_positional_parts = []
        positional_parts = []

        if self.requiredLayout.rowCount() > 0:
            for row in range(self.requiredLayout.rowCount()):
                if self.requiredLayout.itemAtPosition(row, 2) is not None:
                    name, input = self._checkNameValue(\
                            self.requiredLayout.itemAtPosition(row, 1),
                            self.requiredLayout.itemAtPosition(row, 2))
                    if name.startswith('-'):
                        non_positional_parts.append(name)
                        if input is not None:
                            if type(input) == list:
                                for i in input:
                                    non_positional_parts.append(i)
                            else:
                                non_positional_parts.append(input)
                    else:  # positional args have no name
                        if input is not None:
                            if type(input) == list:
                                for i in input:
                                    positional_parts.append(i)
                            else:
                                positional_parts.append(input)

        if self.optionalLayout.rowCount() > 0:
            for row in range(self.optionalLayout.rowCount()):
                if self.optionalLayout.itemAtPosition(row, 0) is None:
                    continue
                include = self.optionalLayout.itemAtPosition(row, 0).widget()
                if include.checkState():
                    name, input = self._checkNameValue(\
                        self.optionalLayout.itemAtPosition(row, 1),
                        self.optionalLayout.itemAtPosition(row, 2))
                    if name.startswith('-'):
                        non_positional_parts.append(name)
                        if input is not None:
                            if type(input) == list:
                                for i in input:
                                    non_positional_parts.append(i)
                            else:
                                non_positional_parts.append(input)
                    else:  # positional args have no name
                        if input is not None:
                            if type(input) == list:
                                for i in input:
                                    positional_parts.append(i)
                            else:
                                positional_parts.append(input)

        # deal with non-displayed args
        for arg in self.argobs:
            if not arg.display:
                if arg.default is not None:
                    if arg.long_form.startswith('-'):
                        non_positional_parts.append(arg.long_form)
                        non_positional_parts.append(str(arg.default))
                    else:  # postional args have no name
                        positional_parts.append(str(arg.default))

        # join space separated components of strings with quotes
        for i, part in enumerate(non_positional_parts):
            if type(part) == str and ' ' in part:
                non_positional_parts[i] = "'" + part + "'"

        rr = RunRecord()
        rr.addCommands(non_positional_parts)
        rr.addCommands(positional_parts)

        return non_positional_parts + positional_parts

コード例 #12

0

ファイルを表示

def main(ui=None):
    """
        1) Get all protein coding genes from DB.
        2) Read WIG file and if a count record is in a gene then add
            to its total
        3) Write out genes and expr values
    """
    rr = RunRecord('expr_wig_to_exp')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse(window_title='Expression WIG to EXP')
    chrom_size = args.max_chrom_size
    prefix = args.chr_prefix

    session = db_query.make_session(args.db_path)
    genes = db_query.get_gene_entries(session)

    all_genes = {} # genes indexed by ensembl_id
    genes_by_chrom = {} # chrom: list(gene_id)
    genes_scores = {} # each gene has an expression score
    for gene in genes:
        if not gene.chrom in genes_by_chrom.keys():
            genes_by_chrom[gene.chrom] = []
        genes_by_chrom[gene.chrom].append(gene.ensembl_id)
        genes_scores[gene.ensembl_id] = 0
        all_genes[gene.ensembl_id] = gene

    wig_fn = args.wig
    if wig_fn.endswith('.gz'):
        wig_file = gzip.GzipFile(wig_fn, 'rb')
    else:
        try:
            wig_file = open(wig_fn, 'r')
        except IOError:
            rr.dieOnCritical('Could not open file', wig_fn)

    # get total lines in wig for pacing the progress bar
    if not wig_fn.endswith('.gz'):
        command = 'wc -l ' + wig_fn
        returncode, stdout, stderr = run_command(command)
        if returncode:
            rr.addWarning('could not run wc to count WIG lines', 'error')
            total_lines = 1
        else:
            total_lines = int(stdout.strip().split(' ')[0])
            rr.addInfo('total lines in '+wig_fn, total_lines)

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom

    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, line in enumerate(wig_file):
        if i % 100 == 0:
            msg = 'Reading wiggle entries [' + str(i) +\
                  ' / ' + str(total_lines) + ']'
            progress = (float(i)/float(total_lines))
            ui.display(msg=msg, progress=progress)

        if line.startswith('track'):
            continue
        elif line.startswith('fixed'):
            # fixedStep chrom=chr10 start=56001 step=20 span=20
            step_type = 'fixed'
            step_parts = line.split(' ')
            step = [val.strip('step=').strip() \
                    for val in step_parts if val.startswith('step')][0]
            span = [val.strip('span=').strip() \
                    for val in step_parts if val.startswith('span')][0]
            chrom = [val.strip('chrom='+prefix).strip() \
                     for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom: # Empty chrom_array into genes
                get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
                        genes_by_chrom, genes_scores)
                current_chrom = chrom
                chrom_array[:] = 0

            start = [val.strip('start=').strip() \
                     for val in step_parts if val.startswith('start')][0]
            pos = int(start)
            step = int(step)
            span = int(span)
        elif line.startswith('variable'):
            step_type = 'variable'
            step_parts = line.split(' ')
            chrom = [val.strip('chrom='+prefix).strip() \
                    for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom: # Empty chrom_array into genes
                get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
                        genes_by_chrom, genes_scores)
                current_chrom = chrom
                chrom_array[:] = 0
        else:
            if step_type == 'fixed':
                chrom_array[pos] = float(line.strip())
                pos += step
            else: #step_type == 'variable'
                if '\t' in line:
                    line_parts = line.split('\t')
                else:
                    line_parts = line.split(' ')
                chrom_array[int(line_parts[0])] = float(line_parts[1].strip())

    # empty chrom_array into genes_score from the final section
    get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
            genes_by_chrom, genes_scores)

    # output genes and scores
    if args.exp:
        out_fn = args.exp
    else:
        if '.gz' in wig_fn:
            wig_fn = '.'.join(wig_fn.split('.')[:-1])
        out_fn = '.'.join(wig_fn.split('.')[:-1]) # cut off wig extension
        out_fn += '.exp' # add .exp extension

    with open(out_fn, 'w') as out:
        out.write('gene\texp\n') # header
        for id in genes_scores.keys():
            out.write(id + '\t' + str(genes_scores[id]) + '\n')
        out.close()

コード例 #13

0

ファイルを表示

def main():
    """ 1) Set counts_func
        2) Load studies
        3) Load divisor study if provided
        4) Normalise studies if required
        5) Set genes_of_interest
        6) Filter studies by genes_of_interest and statistical cutoff
        7) Create plotlines from studies
        8) Smooth or bin plotlines as required
        9) Do plot division (if required)
        10) Set basic plotting info
        11) Set lines colors as needed
        12) Create Plot
        13) Save Plot
        14) Save genes in plot to file in rank order (optional)
    """
    rr = RunRecord('plot_counts')
    rr.addCommands(sys.argv)
    args = script_info['args'].parse(window_title='Plot Counts')

    # 1: Set feature counting metric
    counts_func = set_counts_function(args.counts_metric)

    # 2: Load studies
    print 'Loading counts data'
    studies, window_upstream, window_downstream =\
            load_studies(args.collections, counts_func)

    # 3: Load divisor study if provided
    if args.div is not None:
        div_studies, div_window_upstream, div_window_downstream =\
                load_studies([args.div], counts_func)
        if div_window_upstream == window_upstream and \
                div_window_downstream == window_downstream:
            print 'Windows match - using div study'
            studies.append(div_studies[0])
            # alter name so we divide by the same study
            div_studies[0].collection_label += '_div'
            div_name = div_studies[0].collection_label
        else:
            rr.dieOnCritical('Differing Data and Div up/down-stream '+\
                    'window sizes',
                    [div_window_upstream, div_window_downstream,
                     window_upstream, window_downstream])
    else:
        div_name = None

    # 4: RPM Normalise counts by default
    if not args.no_normalise and args.counts_metric == 'mean':
        print 'Normalising by counts RPM'
        for study in studies:
            study.normaliseByRPM()

    # 5: Specify genes of interest to direct study
    for study in studies:
        study.filterByGenes(args.db_path, chrom=args.chrom,
                include_samples=args.include_targets,
                exclude_samples=args.exclude_targets)

    # 6: Filter all genes in studies by statistical cutoff
    if args.data_cutoff > 0.0:
        for study in studies:
            study.filterByCutoff(args.data_cutoff)

    # 7: Create plot lines for each study in studies
    try:
        group_size = int(args.group_size)
    except ValueError:
        group_size = 'All'

    plot_lines = []
    for study in studies:
        lines = study.asPlotLines(group_size, args.group_location,
                p=args.line_cutoff)

        for line in lines:
            plot_lines.append(line)

    rr.addInfo('Total number of lines from all studies', len(plot_lines))

    # 8: smooth and/or bin plot lines as required
    if args.binning and args.binning > 0:
        for line in plot_lines:
            line.applyBinning(args.binning)
        rr.addInfo('lines binned to width', args.binning)

    if args.smoothing and args.smoothing > 0:
        for line in plot_lines:
            line.applySmoothing(args.smoothing)
        rr.addInfo('lines smoothed to width', args.smoothing)

    # 9: Do plot division if required
    if div_name:
        plot_lines = div_plots(plot_lines, div_name, div_by=args.div_by)

    rr.addInfo('Total number of lines to plot', len(plot_lines))

    # 10: set basic plotting info
    ylim = None
    if args.ylim is not None:
        if ',' not in args.ylim:
            rr.dieOnCritical('ylim must be comma separated', ylim)
        ylim = map(float, args.ylim.strip().split(','))

    # if we have a plot series, create a directory to write plots
    if args.plot_series and not args.test_run:
        plot_series_dir = set_up_series_plots_dir(args.plot_filename)
        filename_series = []
    else:
        plot_series_dir = None
        filename_series = None
        series_labels = None
        label_coords = None
    
    print 'Prepping for plot'

    vline = dict(x=0, linewidth=args.vline_width,
            linestyle=args.vline_style, color='w')
    
    plot = PlottableGroups(height=args.fig_height/2.5,
            width=args.fig_width/2.5, bgcolor=args.bgcolor,
            grid_off=args.grid_off,
            yaxis_lims=ylim, xaxis_lims=(-window_upstream, window_downstream),
            xy_tick_spaces=(args.xgrid_lines, args.ygrid_lines),
            xy_tick_intervals=(args.xtick_interval, args.ytick_interval),
            offset_ticks=args.offset_ticks, linewidth=args.line_width,
            title_size=args.title_size, font=args.font,
            xy_label_fontsizes=(args.xfont_size, args.yfont_size),
            vline=vline, ioff=True, colorbar=args.colorbar,
            clean=args.clean_plot)
    
    x = numpy.arange(-window_upstream, window_downstream)

    # 11: set line colors
    plot_lines = set_plot_colors(plot_lines, studies,
            div_name, args.bgcolor, args.grey_scale,
            restrict_colors=args.restrict_colors)

    # 12: Create plot
    plot(x, plot_lines=plot_lines, filename_series=filename_series,
            xlabel=args.xlabel, ylabel=args.ylabel,
            title=args.title, colorbar=args.colorbar,
            labels_size=args.legend_font_size, show_legend=args.legend,
            plot_CI=args.confidence_intervals)

    # 13: Save plots
    # if series, create directory
    if args.plot_series and not args.test_run:
        set_up_series_plots_dir(args.plot_filename)

    if args.plot_filename and not args.test_run:
        plot_fn = args.plot_filename
        if '.pdf' in plot_fn.lower():
            plot.savefig(plot_fn, image_format='pdf')
        elif '.png' in plot_fn.lower():
            plot.savefig(plot_fn, image_format='png')
        elif '.jpg' in plot_fn.lower() or '.jpeg' in plot_fn.lower():
            plot.savefig(plot_fn, image_format='jpg')
        else:
            plot.savefig(plot_fn+'.pdf', image_format='pdf')
    else:
        plot.show()

    # 14: Save ENSEMBL gene ids by rank if requested
    if args.write_genes_by_rank:
        for study in studies:
            fn_parts = args.write_genes_by_rank.split('.')
            if len(fn_parts) > 1:
                f_ext = fn_parts[-1]
            else:
                f_ext = 'txt'
            fn = '.'.join(fn_parts[:-1]) + '_' +\
                    study.collection_label.replace(' ', '_') + '.' + f_ext
            with open(fn, 'w') as out:
                out.write('gene' + '\n')
                plot_lines.sort(key=lambda x: x.rank)
                for line in plot_lines:
                    for label in line.getLabelsAsList():
                        out.write(label + '\n')

    rr.display()