Beispiel #1
0
 def listTable(self, inPath):
     """List entries in a TAB file."""
     file = BinaryFile(inPath, 'rb')
     tbl  = TabFile(file)
     print("Item C Flg Offset  (C=compressed?)")
     for i, entry in enumerate(tbl.getEntries()):
         printf("%5d: %s  %02X %06X\n", i,
             'Y' if entry['compressed'] else '-',
             entry['flags'], entry['offset'])
Beispiel #2
0
 def __init__(self, dimensions):
     # input dimensions as (dimX, dimY) #
     self.dimensions = dimensions
     self.file = TabFile('input/tile_info.txt')
     self.tile_map = {}
     self.tile_img_ext = '.png'
     self.tile_img_maindir = 'input/images/'
     self.tile_img_dimname = '%d_%d' % (self.dimensions[0], self.dimensions[1]) 
     self.tile_img_targetdir = self.tile_img_maindir+self.tile_img_dimname+'/'
     self.setup()
Beispiel #3
0
 def __init__(self, filename):
     TabFile.__init__(self, filename)
     self.gpa_4 = get_dict(omsas)
     self.marks_by_year = {}
     self.all_marks = []
     self.get()
Beispiel #4
0
class TileMaster:
    def __init__(self, dimensions):
        # input dimensions as (dimX, dimY) #
        self.dimensions = dimensions
        self.file = TabFile('input/tile_info.txt')
        self.tile_map = {}
        self.tile_img_ext = '.png'
        self.tile_img_maindir = 'input/images/'
        self.tile_img_dimname = '%d_%d' % (self.dimensions[0], self.dimensions[1]) 
        self.tile_img_targetdir = self.tile_img_maindir+self.tile_img_dimname+'/'
        self.setup()

    def setup(self):
        symbol_num = -1
        for line in self.file.parse():
            if line.isspace() or line == '':
                continue
            else:
                symbol_num += 1
                n = -1
                tile_info = {}
                for raw_info in self.file.parse_tabline(line, skip_emptyinfo=True):
                    info = ''
                    for char in raw_info:
                        if char.isalpha() or char.isdigit() or char == '-':
                            info += char
                        else:
                            continue
                    if info == '':
                        raise ValueError('Line: "%s" provided empty info: "%s".' % (line, raw_info))
                    n += 1       
                    header = self.file.header[n]
                    try:
                        tile_info[header] = int(info)
                    except ValueError:
                        tile_info[header] = info
                self.tile_map[SYMBOLS[symbol_num]] = tile_info

    def det_sealevel(self):
        ocean = self.tile_map['0']
        if ocean['name'] != 'ocean':
            raise ValueError()
        shore = self.tile_map['1']
        if shore['name'] != 'shore':
            raise ValueError()
        return shore['maxE']

    def setup_images(self):
        for key, tile in self.tile_map.items():
            output_id = '%s.%s%s' % (tile['name'], self.tile_img_dimname, self.tile_img_ext)
            filename = os.path.join(self.tile_img_maindir, tile['name']+self.tile_img_ext)
            resized_path = check_if_resized(self.tile_img_targetdir, output_id, self.dimensions, filename)
            img = Image.open(resized_path)
            tile['image'] = ImageTk.PhotoImage(image=img)
            
    def choose_tile(self, elevation, temperature):
        possibilities = {}
        for key, tile in self.tile_map.items():
            if tile['minE'] <= elevation <= tile['maxE']:
                if tile['minT'] <= temperature <= tile['maxT']:
                    possibilities[key] = tile['value']
                else:
                    continue
            else:
                continue
        wc = WeightedChoice(possibilities)
        return wc.get()
def find_sites(
    peaks_file,
    fasta_file,
    motif,
    bed=True,
    xls=False,
    output_dir=None,
    motif_type="MEME",
    src_fnc="find_sites",
    bysummit=False,
    **kwargs
):
    """
findSites(peaks_file,FASTAfile,motif) takes the NAME_peaks.xls file outputed
by MACS, as well as a FASTAfile, and finds instances of the motif specified by
motif (a Bio.Motif object). It will output two new files for peaks and sites
called NAME.peaks.info and NAME.sites.info. It will also create files called
NAMES.peaks.bed and NAME.sites.bed which are proper BED files (scores are tag
density, and information content, respectively). All files are 0-based,
half-open in line with the BED convention. MACS coordinates are corrected
accordingly.

f.peaks.info contains
Peak (1) chr, (2) start (3) end
(4) Peak ID
(5) Relative summit
(6) Number of unique tags in peak region
(7) -10*log10(pvalue)
(8) fold_enrichment
(9) FDR
(10) # motif instances found
(11) Total Ri for discovered motif instances
(12) Greatest Ri of any motif in peak region
(13) Sequence of that motif instance
(14) Position (offset) of that motif (left-end)

f.peaks.bed contains
Peak (1) chr, (2) start (3) end
(4) Peak ID
(5) Number of unique tags in peak region
(6) Strand .
(7) Summit position (absolute)
(8) Summit position + 1

f.sites.info contains
Site (1) chr (2) start (3) end
(4) Unique Site ID (internally generated)
(5) The motif information content Ri, in bits
(6) motif orientation, best score (+) or (-)
---- BED file ends here ----
(7) the motif sequence (e.g., ACAACA)
(8) Position (offset) of that motif (left-end)
(9) peak ID, fetched from MACS
(10) used peak length
(11) true peak length
(11) peak summit offset
    """

    if type(motif) is str:
        motif = Bio.Motif.read(open(motif), motif_type)

    # start the output file
    prefix = os.path.splitext(os.path.basename(peaks_file))[0]
    if output_dir is not None:
        prefix = os.path.join(output_dir, prefix)
    sites_info = TabFile(os.extsep.join([prefix, "sites", "info"]), "w")
    sites_bed = TabFile(os.extsep.join([prefix, "sites", "bed"]), "w")
    peaks_info = TabFile(os.extsep.join([prefix, "peaks", "info"]), "w")
    peaks_bed = TabFile(os.extsep.join([prefix, "peaks", "bed"]), "w")

    peaks_cols = [
        "chr",
        "start",
        "end",
        "peak_ID",
        "peak_intensity",
        "site_count",
        "total_Ri",
        "best_Ri",
        "best_seq",
        "best_offset",
        "best_strand",
        "clean_peak_length",
        "peak_summit",
        "peak_misc",
    ]
    peaks_msg = os.linesep.join(
        [
            "# This file was generated by " + src_fnc,
            "# comments are retained from original file",
            "\t".join(peaks_cols),
            "",
        ]
    )
    peaks_info.write(peaks_msg)
    sites_cols = [
        "chr",
        "start",
        "end",
        "site_ID",
        "Ri",
        "strand",
        "offset",
        "motif_seq",
        "peak_ID",
        "peak_length",
        "reported_peak_length",
        "peak_summit",
    ]
    sites_msg = os.linesep.join(["# This file was generated by " + src_fnc, "\t".join(sites_cols), ""])
    sites_info.write(sites_msg)

    if bed:
        peak_generator = BedFile(peaks_file)
    elif xls:
        peak_generator = MacsFile(peaks_file)
    else:
        raise ValueError("Neither bed nor xls")
    # peakSeqs is a generator
    peak_seqs = (r.seq for r in Bio.SeqIO.parse(open(fasta_file, "rU"), "fasta"))
    nosites = 0
    peaknumber = 0

    for peak in iter(peak_generator):
        #            if peaknumber%10000 is 0: print peaknumber
        peaknumber += 1
        seq = peak_seqs.next()
        # Generate a peak ID
        try:
            peak_ID = peak.name()
        except NameError:
            peak_ID = "{!s}_{!s}".format(prefix, peaknumber)
        # Change behavior to use sequences centered at summit
        (peak_info, peak_bed, sites_info_rows, sites_bed_rows) = search_peak(
            peak_ID, peak, seq, motif, bysummit=bysummit
        )
        peaks_info.write_row(peak_info)
        peaks_bed.write_row(peak_bed)
        sites_info.write_rows(sites_info_rows)
        sites_bed.write_rows(sites_bed_rows)
        if len(sites_info_rows) is 0:
            nosites += 1

    sites_info.close()
    sites_bed.close()
    peaks_info.close()
    peaks_bed.close()
    message = "There were {!s} of {!s} peaks with no identifiable \
sites in {!s} using a cutoff of 0 bits".format(
        nosites, peaknumber, fasta_file
    )
    stdout_buffer = message
    # get the motif
    motif_str = ""
    try:
        motif_str = os.linesep.join(
            [
                ", ".join(["".join([str(base), ": ", str(odds)]) for base, odds in position.items()])
                for position in motif.log_odds()
            ]
        )
    except AttributeError:
        motif_str = str(motif)
    message = os.linesep.join([message, "The following motif was used", motif_str])
    # print message and write it to a log
    g = open(prefix + ".log", "w")
    g.write(message)
    g.close()
    return stdout_buffer