예제 #1
0
    def fixup(self):
        """
        Trim off extra cells to right of # symbols and make sure every row
        is of the same length.
        """
        maxwidth = 0

        # Find max width and trim off unwanted crap
        for i, cells in enumerate(self.rows):
            try:
                endat = cells.index('#')  # find first # (row ender) in any
                if endat == 0:
                    raise FileError("Blueprint has '#' in unexpected cell.")
                else:
                    # trim off stuff from the found # to the right
                    cells = cells[0:endat]
            except:
                # trim off empty cells at end of row
                while cells and cells[-1] == '':
                    cells = cells[:-1]
                endat = len(cells)
            self.rows[i] = cells

            # update maxwidth
            maxwidth = max(maxwidth, endat)

        if maxwidth == 0:
            raise FileError("Blueprint appears to be empty or zero-width.")

        # Conform all rows to the same width
        for row in self.rows:
            if len(row) < maxwidth:
                row.extend(['' for x in range(maxwidth - len(row))])
        return
예제 #2
0
def read_sheet(filename, sheetid):
    """
    Read ths specified sheet from the specified file.
    CSV files are considered a single sheet.
    """

    # verify file exists
    if not os.path.isfile(filename):
        raise FileError('File not found "%s"' % filename)

    # read contents of the file into lines
    ext = os.path.splitext(filename)[1].lower()
    if ext == '.csv':
        lines = read_csv_file(filename)
    elif ext == '.xls':
        lines = xls.read_xls_file(filename, sheetid)
    elif ext == '.xlsx':
        lines = xlsx.read_xlsx_file(filename, sheetid)
    else:
        raise FileError("Invalid file type '%s' (csv, xls, xlsx accepted)" \
            % ext)

    # if there's a line that starts with #, treat it as the last line of
    # the blueprint and trim off everything from there to the end of lines
    for i, line in enumerate(lines):
        if line and line[0] == '#':
            lines = lines[0:i]

    return lines
예제 #3
0
def read_xlsx_file(filename, sheetid):
    """
    Read contents of specified sheet in Excel 2007 (.xlsx) workbook file.
    .xlsx files are actually zip files containing xml files.

    Returns a 2d list of cell values.
    """

    if sheetid is None:
        sheetid = 1
    else:
        sheetid += 1  # sheets are numbered starting from 1 in xlsx files

    # Get cell data from specified worksheet.
    try:
        zf = zipfile.ZipFile(filename)
        sheetdata = zf.read('xl/worksheets/sheet%s.xml' % sheetid)
        xml = xml2obj(sheetdata)
        rows = xml.sheetData.row
    except:
        raise FileError("Could not read xlsx file %s, worksheet id %s" % (
            filename, sheetid - 1))

    # Get shared strings xml. Cell values are given as ordinal index
    # references into sharedStrings.xml:ssi.si elements, whose string-value
    # is found in the node's .t element.
    try:
        stringdata = zf.read('xl/sharedStrings.xml')
        xml = xml2obj(stringdata)
        strings = xml.si
    except:
        raise FileError("Could not parse sharedStrings.xml of xlsx file")

    # Map strings to row values and return result
    return extract_xlsx_lines(rows, strings)
예제 #4
0
def load_aliases(filename):
    """
    Loads aliases.txt-formatted file and returns a dict.

    Accepted formats:
        aliasname,keystrokes    (QF1.x style)
        aliasname:keystrokes    (QF2.x style)
    """
    aliases = {}

    # load the file contents
    try:
        with open(filename) as f:
            data = f.read()
    except:
        raise FileError("Could not open aliases file " + filename)

    data = util.convert_line_endings(data)
    lines = data.split('\n')

    # strip out comment and empty lines
    lines = [line for line in lines if line != '' and line[0] != '#']

    # break into {aliasname:keystrokes} pairs
    for line in lines:
        match = re.match(r'([\w\d]+)(,|:) *(.+)\s*\n*', line)
        if match is not None:
            aliases[match.group(1)] = match.group(3)

    return aliases
예제 #5
0
def parse_file(filename, sheetid):
    """
    Parse the specified file/sheet into FileLayers and associated
    other bits of information.
    """

    # read lines in
    lines = read_sheet(filename, sheetid)

    # raise error on completely empty sheets
    if (len(lines) == 0):
        raise FileError("Sheet is empty, file %s, worksheet id %s" %
                        (filename, sheetid))

    # break into the lines we want
    if lines[0][0] and lines[0][0][0] == '#':
        (top_line, lines) = (','.join(lines[0]), lines[1:])
    else:
        # top line missing, assume #dig
        top_line = '#dig'

    # parse top line details
    details = parse_sheet_details(top_line)

    # break up lines into z-layers separated by #> or #<
    filelayers = split_zlayers(lines)

    # tidy up the layers
    for fl in filelayers:
        fl.fixup()
        fl.clean_cells()

    return filelayers, details
예제 #6
0
def read_xlsx_sheet_names(filename):
    """Get a list of sheets and their ids from xlsx file."""
    try:
        zf = zipfile.ZipFile(filename)
        sheetsdata = zf.read('xl/workbook.xml')
        xml = xml2obj(sheetsdata)
        sheets = xml.sheets.sheet
    except:
        raise FileError("Could not open '%s' for sheet listing." % filename)

    output = []
    for sheet in sheets:
        m = re.match('rId(\d+)', sheet.r_id)
        if not m:
            raise FileError("Could not read list of xlsx's worksheets.")
        output.append((sheet.name, int(m.group(1)) - 1))
    return output
예제 #7
0
    def __init__(self, supplied_binary='', tmpdir='/tmp'):

        if can_locate(supplied_binary):
            self.binary = supplied_binary
        else:

            default_binary = locate_file('gtp.jar', 'GTP_PATH', local_dir)
            self.binary = default_binary

        if self.binary is None:
            raise FileError(supplied_binary)

        self.tmpdir = tmpdir.rstrip('/')
예제 #8
0
    def load_phyml_results(
        self,
        tree_file,
        stats_file,
        name=None,
        program='phyml',
        ):
        """
        Loads phyml results into existing tree object
           - returns None
        """

        exit = False
        for f in (tree_file, stats_file):
            try:
                if not os.path.isfile(f):
                    raise FileError(f)
            except FileError, e:
                print e
                exit = True
예제 #9
0
def delete(filename):
    if can_locate(filename):
        return os.remove(filename)
    else:
        raise FileError(filename)
예제 #10
0
class Tree(object):

    """
    Class for storing the results of phylogenetic inference
    """

    score_regex = re.compile('(?<=Log-likelihood: ).+')
    name_regex = \
        re.compile('([A-Za-z0-9\-_]+).([A-Za-z0-9\-_]+)(?=_phyml_)')

    def __init__(
        self,
        newick=None,
        score=0,
        program=None,
        name=None,
        output=None,
        rooted=None,
        ):

        self.newick = newick
        self.score = score
        self.program = program
        self.name = name
        self.output = output
        self.rooted = self.check_rooted(self.newick)

    def __str__(self):
        """
        Represents the object's information inside
        a newick comment, so is still interpretable
        by a (good) newick parser
        """

        s = '[Tree Object:\n'
        if self.name:
            s += 'Name:\t' + self.name + '\n'
        s += 'Program:\t{0}\n'.format(self.program) \
            + 'Score:\t{0}\n'.format(self.score) \
            + 'Rooted:\t{0}\n'.format(self.rooted) \
            + 'Tree:\t]{0}\n'.format(self.newick)
        return s

    def __eq__(self, other):
        equal = True
        if not self.name == other.name:
            return False
        if not self.newick == other.newick:
            return False
        if not self.program == other.program:
            return False
        if not self.score == other.score:
            return False
        if not self.rooted == other.rooted:
            return False
        if not self.output == other.output:
            return False
        return equal

    def pam2sps(self, multiplier=0.01):
        """
        Scales branch lengths by an order of `multiplier`.
        Default is 0.01, converting PAM units to substitutions
        per site.
        multiplier = 'sps2pam' scales by 100, performing the
        opposite operation.
        multiplier = 'strip' removes branch lengths entirely
        """

        reg_ex = re.compile('(?<=:)[0-9.]+')

        converter = lambda a: str(multiplier * float(a.group()))
        strip_lengths = lambda d: ''

        input_string = self.newick

        if multiplier == 'pam2sps':
            multiplier = 0.01
        elif multiplier == 'sps2pam':
            multiplier = 100

        # Set the output string according to selection

        if multiplier == 'strip':
            output_string = reg_ex.sub(strip_lengths,
                    input_string).replace(':', '')
        else:

            output_string = reg_ex.sub(converter, input_string)

        return Tree(
            output_string,
            self.score,
            self.program,
            self.name,
            self.output,
            self.rooted,
            )

    def read_from_file(self, infile, name=None):
        """
        This and the write_to_file function allow the class to be
        easily stored and reconstituted without using a pickle or
        JSON
        """

        program = None
        tree = None
        score = None
        self.name = name
        reader = open(infile)
        try:
            for line in reader:
                line = [l.rstrip().replace(']', '') for l in
                        line.split()]
                if not name and line[0] == 'Name:':
                    self.name = line[1]
                elif line[0] == 'Program:':
                    self.program = line[1]
                elif line[0] == 'Tree:':
                    self.newick = line[1]
                elif line[0] == 'Score:':
                    self.score = line[1]
        except IndexError:
            return
        return self

    def write_to_file(
        self,
        outfile,
        metadata=False,
        suppress_NHX=False,
        ):
        """
        Writes a string representation of the object's contents
        to file. This can be read using read_from_file to
        reconstruct the Tree object, if metadata is included (i.e.
        metadata=True)
        """

        writer = open(outfile, 'w')
        if metadata:
            writer.write(str(self))
        else:

            writeable = self.newick
            if suppress_NHX:
                if writeable.startswith('[&R] '):
                    writeable = writeable[5:]
            if not writeable.endswith('\n'):
                writeable += '\n'
            writer.write(writeable)
        writer.close()
        return outfile

    @classmethod
    def check_rooted(cls, newick):
        if newick is None:
            return None
        if newick == '':
            return None
        t = dpy.Tree()
        t.read_from_string(newick, 'newick')
        root_degree = len(t.seed_node.child_nodes())
        return root_degree == 2

    @classmethod
    def deroot_tree(cls, newick):
        t = dpy.Tree()
        t.read_from_string(newick, 'newick')
        t.deroot()
        return t.as_newick_string() + ';'

    def reroot_newick(self):
        dpy_tree = dpy.Tree()
        dpy_tree.read_from_string(self.newick, 'newick')
        dpy_tree.resolve_polytomies()
        newick_string = dpy_tree.as_newick_string() + ';\n'
        return newick_string

    def load_phyml_results(
        self,
        tree_file,
        stats_file,
        name=None,
        program='phyml',
        ):
        """
        Loads phyml results into existing tree object
           - returns None
        """

        exit = False
        for f in (tree_file, stats_file):
            try:
                if not os.path.isfile(f):
                    raise FileError(f)
            except FileError, e:
                print e
                exit = True

        if exit:
            print 'Results were not loaded'
            raise FileError()

        if not name:
            name = self.name_regex.search(tree_file).group(1)
        newick = open(tree_file).read()
        stats = open(stats_file).read()
        score = float(self.score_regex.search(stats).group())

        self.program = program
        self.newick = newick
        self.output = stats
        self.score = score
        self.name = name
        self.rooted = self.check_rooted(newick)