Example #1
0
def nexus(args):  # pragma: no cover
    usage = """
    Convert a lexibank dataset to nexus.

        lexibank nexus <DATASET> --output=...
    """
    get_dataset(args)
    parser = argparse.ArgumentParser(prog='nexus', usage=usage)
    parser.add_argument('--output', help='Nexus output file', default=None)
    xargs = parser.parse_args(args.args[1:])

    writer = NexusWriter()

    if not xargs.output:
        print(writer.write())
    else:
        writer.write_to_file(filename=xargs.output)
Example #2
0
  def read_file(self):
  
    with open( self.input_file, 'r') as f:
      self.first_line = f.readline()
   
      # TODO: Needs work xread / tnt file format is loose

      # xread has some other potential clues
      xread_filename = f.readline().strip().replace("'", "")
      xread_matrix_dimensions = f.readline()
      lines = f.readlines()

      f.close

    if "#NEXUS" == self.first_line.strip():
      print "text file is nexus"
  
      # move to nexus folder
      filename, file_extension = os.path.splitext(self.input_file)
      os.rename(self.input_file, filename + ".nex")

      # send to correct matrix handler
      # TODO: A little dirty, probably should 
      # change mediator's handler param so this flows cleaner
      matrixHandler = NexusHandler( filename + ".nex" )
      return matrixHandler.read_file()

    elif "xread" == self.first_line.strip():
    
      print "xread format found"

      dimensions = str(xread_matrix_dimensions).split(' ')
      self.ncols = int(dimensions[0])
      self.nrows = int(dimensions[1])

      custom_block = "\n\nBEGIN VERIFIED_TAXA;\n"
      custom_block += "Dimensions ntax=" + str(self.nrows) + " nchar=4;\n"

      matrix = ""
      matrix_arr = []
      line_buffer = ""
      row_taxa = []
      for l in lines:
        if ";proc/;" != l.strip():

          if line_buffer:
            line_row = line_buffer + " " + l.strip()
          else:
            line_row = l.strip()
        
          if len(line_row) >= self.ncols:
 
            # reconstitute broken rows, then remove space/tabbing
            line_parts = line_row.split(' ')
            line_parts = list(filter(None, line_parts))

            taxon_name = line_parts[0]
            taxon_chars = line_parts[1]
            #taxon_chars = line_parts[1].replace("[", "(")
            #taxon_chars = taxon_chars.replace("]", ")")
          
            #  verify taxa
            verified_taxa = verifyTaxa(taxon_name)
            verified_name = None

            if verified_taxa:
              for taxa in verified_taxa:

                # We split here to exclude the odd citation on the taxon name ( maybe regex what looks like name & name, year would be better )
                verified_name = taxa['name_string'].lower().split(' ')
                row_taxa.append( verified_name[0] )

                custom_block += taxon_name + "    " + taxa['name_string'] + "    " + taxa['match_value'] + "    " + taxa['datasource'] + "\n"

              matrix += "    " + verified_name[0] + "    " + taxon_chars.strip() + "\n"
              matrix_arr.append(taxon_chars.strip())
            else:

              row_taxa.append( taxon_name )
              custom_block += taxon_name + "\n"
              matrix += "    " + taxon_name + "    " + taxon_chars.strip() + "\n"
              matrix_arr.append(taxon_chars.strip())

            line_buffer = ""
          else:
            line_buffer += l.strip()

      custom_block += ";\n"
      custom_block += "END;\n"

      self.custom_block = custom_block      

      print "matrix array"
      marr = []
      for row in matrix_arr:
        items = list(row)
        marr.append(items)

      m = numpy.matrix(marr)
       
      nw = NexusWriter()
      nw.add_comment("Morphobank generated Nexus from xread .txt file ")

      for rx in range(self.nrows):
        taxon_name = row_taxa[rx] 
        cell_value = m.item(rx)

        for cindex, cv in enumerate(cell_value):
          char_no = cindex + 1
          nw.add(taxon_name, char_no, cv)


      # keep the upload path and filename, but change extension
      file_parts = self.input_file.split('.')
      output_file = file_parts[0] + '.nex'

      nw.write_to_file(filename=output_file, interleave=False, charblock=True)

      # move to nexus folder
      #os.rename(xread_filename + ".nex", "./nexus/" + xread_filename + ".nex")

      # wait for file to move before open and append
      #while not os.path.exists('./nexus/' + xread_filename + '.nex'):
      #  time.sleep(1)
 
      #if os.path.isfile('./nexus/' + xread_filename + '.nex'):

      # Custom Block Section
      nexus_file = codecs.open(output_file, 'a', 'utf-8')
      nexus_file.write(custom_block)
      nexus_file.close()      

      return output_file

    else:
      print "do not know how to process this .txt file"
class Test_NexusWriter(unittest.TestCase):
    def setUp(self):
        self.nex = NexusWriter()
        for char in data:
            for taxon, value in data[char].items():
                self.nex.add(taxon, char, value)
    
    def test_char_adding1(self):
        """Test Character Addition 1"""
        assert self.nex.data['char1']['French'] == '1'
        assert self.nex.data['char1']['English'] == '2'
        assert self.nex.data['char1']['Latin'] == '3'

    def test_char_adding2(self):
        """Test Character Addition 2"""
        assert self.nex.data['char2']['French'] == '4'
        assert self.nex.data['char2']['English'] == '5'
        assert self.nex.data['char2']['Latin'] == '6'
    
    def test_char_adding_integer(self):
        """Test Character Addition as integer"""
        self.nex.add('French', 'char3', 9)
        self.nex.add('English', 'char3', '9')
        assert self.nex.data['char3']['French'] == '9'
        assert self.nex.data['char3']['French'] == '9'
    
    def test_characters(self):
        assert 'char1' in self.nex.characters
        assert 'char2' in self.nex.characters
        
    def test_taxa(self):
        assert 'French' in self.nex.taxa
        assert 'English' in self.nex.taxa
        assert 'Latin' in self.nex.taxa
    
    def test_remove(self):
        self.nex.remove("French", "char2")
        assert 'French' not in self.nex.data['char2']
        assert 'French' in self.nex.taxa
        
    def test_remove_character(self):
        self.nex.remove_character("char2")
        assert len(self.nex.characters) == 1
        assert 'char2' not in self.nex.data
        
    def test_remove_taxon(self):
        self.nex.remove_taxon("French")
        assert 'French' not in self.nex.taxa
        for char in self.nex.data:
            assert 'French' not in self.nex.data[char]
        n = self.nex.make_nexus(interleave=False)
        assert re.search("DIMENSIONS NTAX=2 NCHAR=2;", n)
        assert 'French' not in n
        
    def test_nexus_noninterleave(self):
        """Test Nexus Generation - Non-Interleaved"""
        n = self.nex.make_nexus(interleave=False)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'

    def test_nexus_charblock(self):
        """Test Nexus Generation - with characters block"""
        n = self.nex.make_nexus(charblock=True)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("CHARSTATELABELS", n)
        assert re.search("1 char1,", n)
        assert re.search("2 char2", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'

    def test_nexus_interleave(self):
        """Test Nexus Generation - Interleaved"""
        n = self.nex.make_nexus(interleave=True)
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        # char1
        assert re.search("Latin\s+3", n)
        assert re.search("French\s+1", n)
        assert re.search("English\s+2", n)
        # char2
        assert re.search("Latin\s+6", n)
        assert re.search("French\s+4", n)
        assert re.search("English\s+5", n)

        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] == \
            'STANDARD'
        assert re.search("FORMAT.*(INTERLEAVE)", n).groups()[0] == \
            'INTERLEAVE'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] == \
            '123456'
            
    def test_polymorphic_characters(self):
        self.nex.add("French", "char1", 2)
        self.assertEqual(self.nex.data['char1']['French'], "12")
        n = self.nex.make_nexus(charblock=True)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)  # no change
        assert re.search("French\s+\(12\)4", n)
    
    def test_write_to_file(self):
        tmp = NamedTemporaryFile(delete=False, suffix=".nex")
        tmp.close()
        self.nex.write_to_file(tmp.name)
        assert os.path.isfile(tmp.name)
        
        with open(tmp.name, 'r') as handle:
            n = handle.read()
            
        assert re.search("#NEXUS", n)
        assert re.search("BEGIN DATA;", n)
        assert re.search("DIMENSIONS NTAX=3 NCHAR=2;", n)
        assert re.search("MATRIX", n)
        assert re.search("Latin\s+36", n)
        assert re.search("French\s+14", n)
        assert re.search("English\s+25", n)
        assert re.search("FORMAT.*MISSING\=(.+?)", n).groups()[0] == '?'
        assert re.search("FORMAT.*DATATYPE\=(\w+)\s", n).groups()[0] \
            == 'STANDARD'
        assert re.search('FORMAT.*SYMBOLS\="(\d+)";', n).groups()[0] \
            == '123456'
        
        os.unlink(tmp.name)        # cleanup

    def test_write_as_table(self):
        content = self.nex.write_as_table()
        assert re.search("Latin\s+36", content)
        assert re.search("French\s+14", content)
        assert re.search("English\s+25", content)
        assert len(content.split("\n")) == 3
        
    def test_write_as_table_with_polymorphoc(self):
        self.nex.add('French', 'char1', '2')
        content = self.nex.write_as_table()
        assert re.search("Latin\s+36", content)
        assert re.search("French\s+\(12\)4", content)
        assert re.search("English\s+25", content)
        assert len(content.split("\n")) == 3
Example #4
0
  def read_file(self):

    book = xlrd.open_workbook(self.input_file)
    sh = book.sheet_by_index(0)

    nw = NexusWriter()

    self.nrows = sh.nrows
    self.ncols = sh.ncols

    nw.add_comment("Morphobank generated Nexus File")
  
    custom_block = "\n\nBEGIN VERIFIED_TAXA;\n"
    custom_block += "Dimensions ntax=" + str(self.nrows) + " nchar=4;\n"

    # Species List Taxa
    for rx in range(self.nrows):
      if rx:
    
        taxon_name = str(sh.cell_value(rowx=rx, colx=0)).strip()
   
        verified_taxa = verifyTaxa(taxon_name)
        verified_name = None
        
        if verified_taxa:
          for taxa in verified_taxa:
            verified_name = taxa['name_string'].lower()
            custom_block += taxon_name + "    " + taxa['name_string'] + "    " + taxa['match_value'] + "    " + taxa['datasource'] + "\n"
        else:
          custom_block += taxon_name + "\n"
    
        for cx in range(self.ncols):
          if cx:

            if is_number( sh.cell_value(rowx=rx, colx=cx)):
              cell_value = int(sh.cell_value(rowx=rx, colx=cx))
            else:
              cell_value = sh.cell_value(rowx=rx, colx=cx)
              cell_value = cell_value.replace("{", "(")
              cell_value = cell_value.replace("}", ")")

            if verified_name:
              nw.add(verified_name, cx, cell_value)
            else:
              nw.add(taxon_name, cx, cell_value)
  
    custom_block += ";\n"
    custom_block += "END;\n"

    self.custom_block = custom_block

    # keep the upload path and filename, but change extension
    file_parts = self.input_file.split('.')
    output_file = file_parts[0] + '.nex'
    nw.write_to_file(filename=file_parts[0] + '.nex', interleave=False, charblock=True)

    ## quick append Custom Block 
    nexus_file = open(output_file, 'a')
    nexus_file.write(custom_block)
    nexus_file.close()

    return output_file