def load(self, filename): """ Load data from either XML or TSV file """ with open(filename, 'r') as f: _,ext = os.path.splitext(filename); if ext=='.xml': # Parse the XML file dom = parse(f) f.close() self.name = dom.documentElement.tagName xmlrows = dom.getElementsByTagName('row') for xmlrow in xmlrows: attr = xmlrow.attributes for i in range(attr.length): name = attr.item(i).name if not name in self.attrs(): # Add column to the table elt = Value(attr.item(i).value) index = self.attrStore.addAttr(name,elt.getType()) self.columns.append(index) for r in self.data: r.append(Value()) row = [unicode(xmlrow.getAttribute(name)) for name in self.attrs()] self.addrow(row) else: # Assume the input is a TSV file data = csv.reader(f,delimiter='\t') if len(data): for i in range(len(data[0])): self.attrStore('<untitled>'+str(i),None) for row in data: self.addrow([cell.decode('unicode-escape') for cell in row])
def addrow(self, strrow): assert len(strrow) == len(self.columns) row = [] for i in range(len(strrow)): val = Value(strrow[i]) #print str(val.getType())+' et '+str(self.types[i]) if self.getType(i) is type(None): # Initialize type self.setType(i,val.getType()) row.append(val) elif val.getType() == self.getType(i) or val.getType() is type(None): row.append(val) else: row.append(Value(val=strrow[i])) if self.getType(i) != unicode: # Convert all other values in the column back to string # TODO : handle more fine-grained fallback (eg Float to Int) for r in self.data: if not r[i].getType() is type(None): r[i] = Value(val=unicode(r[i])) self.setType(i,unicode) self.data.append(row)
def addrow(self, strrow): assert len(strrow) == len(self.columns) row = [] for i in range(len(strrow)): val = Value(strrow[i]) #print str(val.getType())+' et '+str(self.types[i]) if self.getType(i) is type(None): # Initialize type self.setType(i, val.getType()) row.append(val) elif val.getType() == self.getType( i) or val.getType() is type(None): row.append(val) else: row.append(Value(val=strrow[i])) if self.getType(i) != unicode: # Convert all other values in the column back to string # TODO : handle more fine-grained fallback (eg Float to Int) for r in self.data: if not r[i].getType() is type(None): r[i] = Value(val=unicode(r[i])) self.setType(i, unicode) self.data.append(row)
def load(self, filename): """ Load data from either XML or TSV file """ with open(filename, 'r') as f: _, ext = os.path.splitext(filename) if ext == '.xml': # Parse the XML file dom = parse(f) f.close() self.name = dom.documentElement.tagName xmlrows = dom.getElementsByTagName('row') for xmlrow in xmlrows: attr = xmlrow.attributes for i in range(attr.length): name = attr.item(i).name if not name in self.attrs(): # Add column to the table elt = Value(attr.item(i).value) index = self.attrStore.addAttr(name, elt.getType()) self.columns.append(index) for r in self.data: r.append(Value()) row = [ unicode(xmlrow.getAttribute(name)) for name in self.attrs() ] self.addrow(row) else: # Assume the input is a TSV file data = csv.reader(f, delimiter='\t') if len(data): for i in range(len(data[0])): self.attrStore('<untitled>' + str(i), None) for row in data: self.addrow( [cell.decode('unicode-escape') for cell in row])