Ejemplo n.º 1
0
 def start_chippy_db(self):
     """ script to create a new DB """
     command = self._make_cmd_str('start_chippy_db.py', include_db=False)
     returncode, stdout, stderr = run_command(command)
     if returncode == 0:
         if self.check_valid_db(stdout):
             self.current_db = stdout
             # Check the DB works correctly
             session = db_query.make_session(self.current_db)
             self.populateDBInfo(session)
             session.close()
             self.switch_menu_actions(True)
Ejemplo n.º 2
0
 def expression_distribution(self):
     """ launch expr_distribution.py """
     command = self._make_cmd_str('expr_distribution.py')
     run_command(command)
     self.populateLogTable()
Ejemplo n.º 3
0
 def counts_distribution(self):
     """ launch counts_distribution.py """
     command = self._make_cmd_str('counts_distribution.py')
     run_command(command)
     self.populateLogTable()
Ejemplo n.º 4
0
 def counts_vs_expression(self):
     """ launch counts_vs_expr.py """
     command = self._make_cmd_str('counts_vs_expr.py')
     run_command(command)
     self.populateLogTable()
Ejemplo n.º 5
0
 def expression_diff_vs_abs(self):
     """ launch diff_abs_plots.py """
     command = self._make_cmd_str('diff_abs_plots.py')
     run_command(command)
     self.populateLogTable()
Ejemplo n.º 6
0
 def plot_counts_data(self):
     """ launch plot_counts.py """
     command = self._make_cmd_str('plot_counts.py')
     run_command(command)
     self.populateLogTable()
Ejemplo n.º 7
0
 def drop_expression_data(self):
     """ launch drop_expression_db.py """
     command = self._make_cmd_str('drop_expression_db.py')
     run_command(command)
     self.populateDBTable()
     self.populateLogTable()
Ejemplo n.º 8
0
def read_vcf(vcf_path, ROIs, chr_prefix='', chrom_size=300000000, ui=None):
    """
        Either use cyvcf for ultra-fast VCF reading or do it ourselves
    """
    rr = RunRecord('read_wiggle')
    total_score = 0  # used to calculate normalisation factors
    roi_ids_by_chrom = {}  # to save time in parsing positions
    all_rois = {}  # ROIs by uniqueID - converted back to list at end
    for roi in ROIs:
        if not roi.chrom in roi_ids_by_chrom.keys():
            roi_ids_by_chrom[roi.chrom] = []
        roi_ids_by_chrom[roi.chrom].append(roi.unique_id)
        all_rois[roi.unique_id] = roi

    if NO_CYVCF:
        # Parse the VCF ourselves
        if vcf_path.endswith('.gz'):
            vcf_file = GzipFile(vcf_path, 'rb')
        else:
            try:
                vcf_file = open(vcf_path, 'r')
            except IOError:
                rr.dieOnCritical('Could not open file', vcf_path)

        # get total lines in wig for pacing the progress bar
        if not vcf_path.endswith('.gz'):
            command = 'wc -l ' + vcf_path
        else:
            command = 'gunzip -c ' + vcf_path + ' | wc -l'

        returncode, stdout, stderr = run_command(command)
        if returncode:
            rr.addWarning('could not run wc to count WIG lines', 'error')
            total_lines = 1
        else:
            total_lines = int(stdout.strip().split(' ')[0])
            rr.addInfo('total lines in ' + vcf_path, total_lines)

    else:
        vcf_file = cyvcf.Reader(open(vcf_path, 'rb'))
        total_lines = len(vcf_file)

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom
    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, line in enumerate(vcf_file):
        if i % 100 == 0:
            msg = 'Reading VCF entries [' + str(i) +\
                   ' / ' + str(total_lines) + ']'
            progress = (float(i) / float(total_lines))
            ui.display(msg=msg, progress=progress)

        if line.startswith('#'):
            continue  # header line

        line_parts = line.split('\t')
        chrom = line_parts[0].strip().lstrip(chr_prefix)
        if current_chrom is None:
            current_chrom = chrom
        elif current_chrom != chrom:
            get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                      roi_ids_by_chrom)
            current_chrom = chrom
            chrom_array[:] = 0

        position = int(line_parts[1].strip())
        if position == 0:
            continue  # telomere

        try:
            chrom_array[position:position + 1] += 1.0
        except IndexError:
            rr.addWarning('position larger than expect chrom size', position)
        total_score += 1

    # Get last chromosome entries
    get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                              roi_ids_by_chrom)

    ROIs = [all_rois[key] for key in all_rois.keys()]
    num_tags = total_score
    num_bases = total_score * 75
    mapped_tags = total_score

    return ROIs, num_tags, num_bases, mapped_tags
Ejemplo n.º 9
0
def read_BAM(bam_path, ROIs, chr_prefix='', chrom_size=300000000, ui=None):
    """
        Get sequence reads for each chrom.
        Relies on Samtools view.

        Cogent entries are 0-offset with ends+1 so they slice perfectly into arrays
        SAM entries are 1-offset so subtract from _start to get proper slice.

        Output ROIs as well as total tags read and total bases added as
        these can be used by later stage for normalisation.
    """
    rr = RunRecord('read_BAM')

    roi_ids_by_chrom = {}  # to save time in parsing positions
    all_rois = {}  # ROIs by uniqueID - converted back to list at end
    for roi in ROIs:
        if not roi.chrom in roi_ids_by_chrom.keys():
            roi_ids_by_chrom[roi.chrom] = []
        roi_ids_by_chrom[roi.chrom].append(roi.unique_id)
        all_rois[roi.unique_id] = roi

    # Collect stats for counts normalisation
    num_tags = 0
    num_bases = 0
    mapped_tags = 0

    # get total number of mapped tags first
    command = 'samtools idxstats ' + bam_path
    returncode, stdout, stderr = run_command(command)
    if returncode != 0:
        rr.dieOnCritical('Samtools idxstats died', 'Indexed correctly?')
    else:
        lines = stdout.split('\n')
        for line in lines:
            line_parts = line.split('\t')
            if len(line_parts) == 4:
                mapped_reads = line_parts[2]
                mapped_tags += int(mapped_reads)

    valid_flags = set([0, 16, 83, 99, 147, 163])
    bam_lines_seen = 0
    valid_flag_count = 0
    invalid_flag_count = 0

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom
    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    for chrom_key in roi_ids_by_chrom.keys():
        if chr_prefix not in chrom_key:
            chrom = chr_prefix + chrom_key  # default is ''
        else:
            chrom = chrom_key
        chrom_fn = str(uuid.uuid4())
        # get all reads per chrom
        command = 'samtools view ' + bam_path + ' ' + chrom + ' > ' + chrom_fn
        returncode, stdout, stderr = run_command(command)

        if 'fail to determine the sequence name' in stderr:
            reported = False
            for part in stderr.split(' '):
                if chr_prefix in part:
                    reported = True
                    rr.addWarning('Possibly incorrect chromosome prefix', part)
            if not reported:
                rr.addWarning('Possibly incorrect chromosome prefix', stderr)

        if returncode == 0:
            with open(chrom_fn, 'r') as bam_lines:
                for entry in bam_lines:
                    if not len(entry):
                        continue
                    bam_lines_seen += 1

                    if bam_lines_seen % 100 == 0:
                        msg = 'Reading BAM entries [' + str(bam_lines_seen) +\
                                ' / ' + str(mapped_tags) + ']'
                        progress = (float(bam_lines_seen) / float(mapped_tags))
                        ui.display(msg=msg, progress=progress)

                    entry_parts = entry.split('\t')
                    entry_flag = int(entry_parts[1])
                    if entry_flag in valid_flags:
                        valid_flag_count += 1
                        # translate into 0-based space
                        entry_start = int(entry_parts[3]) - 1
                        length_components = map(
                            int, re.findall(r"([\d]+)M", entry_parts[5]))
                        entry_length = sum(length_components)
                        entry_end = entry_start + entry_length

                        chrom_array[entry_start:entry_end] += 1.0

                        num_tags += 1
                        num_bases += entry_length
                    else:
                        invalid_flag_count += 1
        else:
            os.remove(chrom_fn)
            rr.dieOnCritical('Samtools view failed; Sorted? Indexed?',
                             returncode)

        os.remove(chrom_fn)  # clean up temp chrom file
        get_roi_scores_from_chrom(chrom_array, chrom_key, all_rois,
                                  roi_ids_by_chrom)
        chrom_array[:] = 0

    rr.addInfo('Number of BAM records evaluated', bam_lines_seen)
    rr.addInfo('Number of valid BAM records seen', valid_flag_count)
    rr.addInfo('Number of invalid BAM records seen', invalid_flag_count)

    ROIs = [all_rois[key] for key in all_rois.keys()]
    return ROIs, num_tags, num_bases, mapped_tags
Ejemplo n.º 10
0
def read_BED(bedfile_path, ROIs, chr_prefix='', chrom_size=300000000, ui=None):
    """
        Read BED entries into chromosome array. Slice into each ROI.

        BED entries are 0-offset for start, 1-offset for end - same as Python.

        Output ROIs as well as total tags read and total bases added as
        these can be used by later stage for normalisation.
    """

    rr = RunRecord('read_BED')
    num_tags = 0
    num_bases = 0

    try:
        if '.gz' in bedfile_path:
            bed_data = GzipFile(bedfile_path, 'rb')
        else:
            bed_data = open(bedfile_path, 'r')
    except IOError:
        rr.dieOnCritical('No BED file found', bedfile_path)

    # get total lines in file for pacing the progress bar, but
    # this is also the total number of mapped reads for normalisation
    if not bedfile_path.endswith('.gz'):
        command = 'wc -l ' + bedfile_path
    else:
        command = 'gunzip -c ' + bedfile_path + ' | wc -l'

    returncode, stdout, stderr = run_command(command)
    if returncode:
        rr.addWarning('could not run wc to count BED lines', 'error')
        total_BED_lines = 1
    else:
        total_BED_lines = int(stdout.strip().split(' ')[0])
        rr.addInfo('total lines in ' + bedfile_path, total_BED_lines)

    roi_ids_by_chrom = {}  # to save time in parsing positions
    all_rois = {}  # ROIs by uniqueID - converted back to list at end
    for roi in ROIs:
        if not roi.chrom in roi_ids_by_chrom.keys():
            roi_ids_by_chrom[roi.chrom] = []
        roi_ids_by_chrom[roi.chrom].append(roi.unique_id)
        all_rois[roi.unique_id] = roi

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom
    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, bed_entry in enumerate(bed_data):
        if i % 100 == 0:
            msg = 'Reading BED entries [' + str(i) +\
                  ' / ' + str(total_BED_lines) + ']'
            progress = (float(i) / float(total_BED_lines))
            ui.display(msg=msg, progress=progress)

        bed_parts = bed_entry.split('\t')
        if len(bed_parts) <= 3:
            continue  # some sort of track or browser line

        bed_chrom = str(bed_parts[0]).lstrip(chr_prefix)

        # check if it's time to flush the chrom counts array
        if bed_chrom != current_chrom and current_chrom is not None:
            get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                      roi_ids_by_chrom)
            chrom_array[:] = 0

        current_chrom = bed_chrom
        try:
            bed_start = int(bed_parts[1].strip())
            bed_end = int(bed_parts[2].strip())

            # don't use the quality score
            try:
                bed_score = float(bed_parts[4].strip())
            except ValueError, TypeError:
                bed_score = 1

            try:
                chrom_array[bed_start:bed_end] += 1.0
            except IndexError:
                rr.addWarning('position larger than expect chrom size',
                              bed_start)
            num_bases += bed_end - bed_start
            num_tags += (bed_end - bed_start) / 75
        except ValueError:
            rr.addWarning('BED file improperly formatted line', bed_entry)
Ejemplo n.º 11
0
def read_BEDgraph(bedgraph_path,
                  ROIs,
                  chr_prefix='',
                  chrom_size=300000000,
                  ui=None):
    """
        Map BEDgraph entries to ROIs. This is essential as data uploaded
        to GEO will be BEDgraphs - we need to be able to read our own data!

        BEDgraph positions are 0-relative. We will need to add 1 to the
        last position to make it the same as Python's 0-1 system.
        Since there is no way to know the actual number of tags used in
        creating a BEDgraph we will simply assume 75-bp reads that are
        fully mapped. This will likely be conservative for most data
        currently available.
    """
    rr = RunRecord('read_BEDgraph')
    num_tags = 0
    num_bases = 0
    # num_tags = num_bases/75

    try:
        if '.gz' in bedgraph_path:
            bed_graph = GzipFile(bedgraph_path, 'rb')
        else:
            bed_graph = open(bedgraph_path, 'r')
    except IOError:
        rr.dieOnCritical('No bedgraph file found', bed_graph)

    # get total lines in file for pacing the progress bar
    if not bedgraph_path.endswith('.gz'):
        command = 'wc -l ' + bedgraph_path
    else:
        command = 'gunzip -c ' + bedgraph_path + ' | wc -l'

    returncode, stdout, stderr = run_command(command)
    if returncode:
        rr.addWarning('could not run wc to count BED lines', 'error')
        total_BEDgraph_lines = 1
    else:
        total_BEDgraph_lines = int(stdout.strip().split(' ')[0])
        rr.addInfo('total lines in ' + bedgraph_path, total_BEDgraph_lines)

    roi_ids_by_chrom = {}  # to save time in parsing positions
    all_rois = {}  # ROIs by uniqueID - converted back to list at end
    for roi in ROIs:
        if not roi.chrom in roi_ids_by_chrom.keys():
            roi_ids_by_chrom[roi.chrom] = []
        roi_ids_by_chrom[roi.chrom].append(roi.unique_id)
        all_rois[roi.unique_id] = roi

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom
    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, bed_entry in enumerate(bed_graph):
        if i % 100 == 0:
            msg = 'Reading BEDgraph entries [' + str(i) +\
                  ' / ' + str(total_BEDgraph_lines) + ']'
            progress = (float(i) / float(total_BEDgraph_lines))
            ui.display(msg=msg, progress=progress)

        if bed_entry.lower().startswith('track'):
            continue

        # check formatting with tabs first, then space
        bed_parts = bed_entry.split('\t')
        if len(bed_parts) != 4:
            bed_parts = bed_entry.split(' ')
            if len(bed_parts) != 4:
                continue  # some sort of track or browser line

        bed_chrom = str(bed_parts[0]).lstrip(chr_prefix)

        # check if it's time to flush the chrom counts array
        if bed_chrom != current_chrom and current_chrom is not None:
            get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                      roi_ids_by_chrom)
            chrom_array[:] = 0

        current_chrom = bed_chrom
        try:
            bed_start = int(bed_parts[1].strip())
            bed_end = int(bed_parts[2].strip()) + 1  # slice offset
            # score = number of reads at this location
            bed_score = float(bed_parts[3].strip())

            try:
                chrom_array[bed_start:bed_end] += bed_score
            except IndexError:
                rr.addWarning('position larger than expect chrom size',
                              bed_start)
            num_bases += (bed_end - bed_start) * bed_score
            num_tags += ((bed_end - bed_start) * bed_score) / 75
        except ValueError:
            rr.addWarning('BED file improperly formatted line', bed_entry)

    # Clear last entries
    if current_chrom is not None:
        get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                  roi_ids_by_chrom)

    # We estimate that each read is 75 after trimming.
    num_tags = int(ceil(num_bases / 75))
    # Estimated total tags is total_bases / 75
    total_tags = num_bases / 75

    ROIs = [all_rois[key] for key in all_rois.keys()]
    return ROIs, num_tags, num_bases, total_tags
Ejemplo n.º 12
0
def read_wiggle(wiggle_path,
                ROIs,
                chr_prefix='',
                chrom_size=300000000,
                ui=None):
    """
        Wiggles are horrible to read from as they have either fixed span
        scores per line or variable position and score per line. It's hard
        to 'bundle up' many lines before considerable CPU needs to be done
        to tie line against region.

        There is NO way to estimate the number of read tags that were used
        in the original study. We will make bases = total_score * span.
        Tags = bases/75.
    """
    rr = RunRecord('read_wiggle')
    total_score = 0  # used to calculate normalisation factors
    roi_ids_by_chrom = {}  # to save time in parsing positions
    all_rois = {}  # ROIs by uniqueID - converted back to list at end
    for roi in ROIs:
        if not roi.chrom in roi_ids_by_chrom.keys():
            roi_ids_by_chrom[roi.chrom] = []
        roi_ids_by_chrom[roi.chrom].append(roi.unique_id)
        all_rois[roi.unique_id] = roi

    if wiggle_path.endswith('.gz'):
        wig_file = GzipFile(wiggle_path, 'rb')
    else:
        try:
            wig_file = open(wiggle_path, 'r')
        except IOError:
            rr.dieOnCritical('Could not open file', wiggle_path)

    # get total lines in wig for pacing the progress bar
    if not wiggle_path.endswith('.gz'):
        command = 'wc -l ' + wiggle_path
    else:
        command = 'gunzip -c ' + wiggle_path + ' | wc -l'

    returncode, stdout, stderr = run_command(command)
    if returncode:
        rr.addWarning('could not run wc to count WIG lines', 'error')
        total_lines = 1
    else:
        total_lines = int(stdout.strip().split(' ')[0])
        rr.addInfo('total lines in ' + wiggle_path, total_lines)

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom
    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, line in enumerate(wig_file):
        if i % 100 == 0:
            msg = 'Reading wiggle entries [' + str(i) +\
                  ' / ' + str(total_lines) + ']'
            progress = (float(i) / float(total_lines))
            ui.display(msg=msg, progress=progress)

        if line.startswith('track'):
            continue
        elif line.startswith('fixed'):
            # fixedStep chrom=chr10 start=56001 step=20 span=20
            step_type = 'fixed'
            step_parts = line.split(' ')
            step = [val.strip('step=').strip()\
                    for val in step_parts if val.startswith('step')][0]
            span = [val.strip('span=').strip()\
                    for val in step_parts if val.startswith('span')][0]
            chrom = [val.strip('chrom='+chr_prefix).strip()\
                     for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom:  # Empty chrom_array into genes
                get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                          roi_ids_by_chrom)
                current_chrom = chrom
                total_score += numpy.sum(chrom_array)
                chrom_array[:] = 0

            start = [val.strip('start=').strip()\
                     for val in step_parts if val.startswith('start')][0]
            pos = int(start)
            step = int(step)
            span = int(span)
        elif line.startswith('variable'):
            step_type = 'variable'
            step_parts = line.split(' ')
            chrom = [val.strip('chrom='+chr_prefix).strip()\
                     for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom:  # Empty chrom_array into genes
                get_roi_scores_from_chrom(chrom_array, current_chrom, all_rois,
                                          roi_ids_by_chrom)
                current_chrom = chrom
                total_score += numpy.sum(chrom_array)
                chrom_array[:] = 0
        else:
            if step_type == 'fixed':
                try:
                    chrom_array[pos:pos + span] = float(line.strip()) / span
                except IndexError:
                    rr.addWarning('position larger than expect chrom size',
                                  pos)
                pos += step
            else:  #step_type == 'variable'
                if '\t' in line:
                    line_parts = line.split('\t')
                else:
                    line_parts = line.split(' ')
                pos = int(line_parts[0])
                try:
                    chrom_array[pos] = float(line_parts[1].strip())
                except IndexError:
                    rr.addWarning('position larger than expect chrom size',
                                  pos)

    # empty chrom_array into genes_score from the final section
    get_roi_scores_from_chrom(chrom_array, chrom, all_rois, roi_ids_by_chrom)
    total_score += numpy.sum(chrom_array)

    ROIs = [all_rois[key] for key in all_rois.keys()]
    num_tags = total_score / 75
    num_bases = total_score
    mapped_tags = num_tags

    return ROIs, num_tags, num_bases, mapped_tags
Ejemplo n.º 13
0
def main(ui=None):
    """
        1) Get all protein coding genes from DB.
        2) Read WIG file and if a count record is in a gene then add
            to its total
        3) Write out genes and expr values
    """
    rr = RunRecord('expr_wig_to_exp')
    rr.addCommands(sys.argv)

    args = script_info['args'].parse(window_title='Expression WIG to EXP')
    chrom_size = args.max_chrom_size
    prefix = args.chr_prefix

    session = db_query.make_session(args.db_path)
    genes = db_query.get_gene_entries(session)

    all_genes = {} # genes indexed by ensembl_id
    genes_by_chrom = {} # chrom: list(gene_id)
    genes_scores = {} # each gene has an expression score
    for gene in genes:
        if not gene.chrom in genes_by_chrom.keys():
            genes_by_chrom[gene.chrom] = []
        genes_by_chrom[gene.chrom].append(gene.ensembl_id)
        genes_scores[gene.ensembl_id] = 0
        all_genes[gene.ensembl_id] = gene

    wig_fn = args.wig
    if wig_fn.endswith('.gz'):
        wig_file = gzip.GzipFile(wig_fn, 'rb')
    else:
        try:
            wig_file = open(wig_fn, 'r')
        except IOError:
            rr.dieOnCritical('Could not open file', wig_fn)

    # get total lines in wig for pacing the progress bar
    if not wig_fn.endswith('.gz'):
        command = 'wc -l ' + wig_fn
        returncode, stdout, stderr = run_command(command)
        if returncode:
            rr.addWarning('could not run wc to count WIG lines', 'error')
            total_lines = 1
        else:
            total_lines = int(stdout.strip().split(' ')[0])
            rr.addInfo('total lines in '+wig_fn, total_lines)

    # Read each piece of the file into an artificial chromosome (Numpy array)
    # and slice out the gene regions that we have for each gene in that chrom

    chrom_array = numpy.zeros(chrom_size, dtype=numpy.float32)

    current_chrom = None
    for i, line in enumerate(wig_file):
        if i % 100 == 0:
            msg = 'Reading wiggle entries [' + str(i) +\
                  ' / ' + str(total_lines) + ']'
            progress = (float(i)/float(total_lines))
            ui.display(msg=msg, progress=progress)

        if line.startswith('track'):
            continue
        elif line.startswith('fixed'):
            # fixedStep chrom=chr10 start=56001 step=20 span=20
            step_type = 'fixed'
            step_parts = line.split(' ')
            step = [val.strip('step=').strip() \
                    for val in step_parts if val.startswith('step')][0]
            span = [val.strip('span=').strip() \
                    for val in step_parts if val.startswith('span')][0]
            chrom = [val.strip('chrom='+prefix).strip() \
                     for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom: # Empty chrom_array into genes
                get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
                        genes_by_chrom, genes_scores)
                current_chrom = chrom
                chrom_array[:] = 0

            start = [val.strip('start=').strip() \
                     for val in step_parts if val.startswith('start')][0]
            pos = int(start)
            step = int(step)
            span = int(span)
        elif line.startswith('variable'):
            step_type = 'variable'
            step_parts = line.split(' ')
            chrom = [val.strip('chrom='+prefix).strip() \
                    for val in step_parts if val.startswith('chrom')][0]

            if chrom == 'M':
                chrom = 'MT'

            if current_chrom is None:
                current_chrom = chrom
            elif current_chrom != chrom: # Empty chrom_array into genes
                get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
                        genes_by_chrom, genes_scores)
                current_chrom = chrom
                chrom_array[:] = 0
        else:
            if step_type == 'fixed':
                chrom_array[pos] = float(line.strip())
                pos += step
            else: #step_type == 'variable'
                if '\t' in line:
                    line_parts = line.split('\t')
                else:
                    line_parts = line.split(' ')
                chrom_array[int(line_parts[0])] = float(line_parts[1].strip())

    # empty chrom_array into genes_score from the final section
    get_gene_scores_from_chrom(chrom_array, chrom, all_genes,
            genes_by_chrom, genes_scores)

    # output genes and scores
    if args.exp:
        out_fn = args.exp
    else:
        if '.gz' in wig_fn:
            wig_fn = '.'.join(wig_fn.split('.')[:-1])
        out_fn = '.'.join(wig_fn.split('.')[:-1]) # cut off wig extension
        out_fn += '.exp' # add .exp extension

    with open(out_fn, 'w') as out:
        out.write('gene\texp\n') # header
        for id in genes_scores.keys():
            out.write(id + '\t' + str(genes_scores[id]) + '\n')
        out.close()